In [None]:
# Importing necessary libraries

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,roc_auc_score

#Importing library for visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
#Filter the unwanted warning
import warnings
warnings.simplefilter("ignore")

In [None]:
#Lets get started exploring the data.

train = pd.read_csv("blood-train.csv")
test=pd.read_csv("blood-test.csv")
train.head()

Unnamed: 0.1,Unnamed: 0,Hospital_name,Blood_group,Months since Last Donation,Number of Donations,City,Total Volume Donated (c.c.),Months since First Donation,Made Donation in March 2007
0,619,Hospital 1,O+,2,50,Tenkasi,12500,98,1
1,664,Hospital 2,O-,0,13,Theni,3250,28,1
2,441,Hospital 3,B+,1,16,Tiruppur,4000,35,1
3,160,Hospital 4,B-,2,20,Coimbatore,5000,45,1
4,358,Hospital 5,A+,1,24,Chennai,6000,77,0


In [None]:
#Printing the train and test size
print("Train Shape : ",train.shape)
print("Test Shape : ",test.shape)

Train Shape :  (576, 9)
Test Shape :  (200, 8)


In [None]:
# Print a concise summary of transfusion DataFrame
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 576 entries, 0 to 575
Data columns (total 9 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Unnamed: 0                   576 non-null    int64 
 1   Hospital_name                576 non-null    object
 2   Blood_group                  576 non-null    object
 3   Months since Last Donation   576 non-null    int64 
 4   Number of Donations          576 non-null    int64 
 5   City                         576 non-null    object
 6   Total Volume Donated (c.c.)  576 non-null    int64 
 7   Months since First Donation  576 non-null    int64 
 8   Made Donation in March 2007  576 non-null    int64 
dtypes: int64(6), object(3)
memory usage: 40.6+ KB


In [None]:
# Rename target column as 'target' for brevity
train.rename(
    columns={'Made Donation in March 2007':'Target'},
    inplace=True
)

In [None]:
#Counting the number of people who donated and not donated
train["Target"].value_counts()

0    438
1    138
Name: Target, dtype: int64

In [None]:
test.head()

Unnamed: 0.1,Unnamed: 0,Hospital_name,Blood_group,Months since Last Donation,Number of Donations,City,Total Volume Donated (c.c.),Months since First Donation
0,659,Hospital 1,O+,2,12,Tenkasi,3000,52
1,276,Hospital 2,O-,21,7,Theni,1750,38
2,263,Hospital 3,B+,4,1,Tiruppur,250,4
3,303,Hospital 4,B-,11,11,Coimbatore,2750,38
4,83,Hospital 5,A+,4,12,Chennai,3000,34


In [None]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   Unnamed: 0                   200 non-null    int64 
 1   Hospital_name                200 non-null    object
 2   Blood_group                  200 non-null    object
 3   Months since Last Donation   200 non-null    int64 
 4   Number of Donations          200 non-null    int64 
 5   City                         200 non-null    object
 6   Total Volume Donated (c.c.)  200 non-null    int64 
 7   Months since First Donation  200 non-null    int64 
dtypes: int64(5), object(3)
memory usage: 12.6+ KB


In [None]:
# Statistics of the data
train.describe()

Unnamed: 0.1,Unnamed: 0,Months since Last Donation,Number of Donations,Total Volume Donated (c.c.),Months since First Donation,Target
count,576.0,576.0,576.0,576.0,576.0,576.0
mean,374.034722,9.439236,5.427083,1356.770833,34.050347,0.239583
std,216.947773,8.175454,5.74001,1435.002556,24.227672,0.4272
min,0.0,0.0,1.0,250.0,2.0,0.0
25%,183.75,2.0,2.0,500.0,16.0,0.0
50%,375.5,7.0,4.0,1000.0,28.0,0.0
75%,562.5,14.0,7.0,1750.0,49.25,0.0
max,747.0,74.0,50.0,12500.0,98.0,1.0


In [None]:
# Import train_test_split method
from sklearn.model_selection import train_test_split

# Split transfusion DataFrame into
# X_train, X_test, y_train and y_test datasets,
# stratifying on the `target` column
X_train, X_test, y_train, y_test = train_test_split(
    train.drop(columns=['Target','Unnamed: 0','Hospital_name','Blood_group','City']),
    train.Target,
    test_size=0.2,
    random_state=0)

In [None]:
%pip install tpot




In [None]:
# Import TPOTClassifier and roc_auc_score
from tpot import TPOTClassifier
from sklearn.metrics import roc_auc_score

# Instantiate TPOTClassifier
tpot = TPOTClassifier(
    generations=5,
    population_size=20,
    verbosity=2,
    scoring='roc_auc',
    random_state=42,
    disable_update_check=True,
    config_dict='TPOT light'
)
tpot.fit(X_train, y_train)

# AUC score for tpot model
tpot_auc_score = roc_auc_score(y_test, tpot.predict_proba(X_test)[:, 1])
print(f'\nAUC score: {tpot_auc_score:.4f}')


# Print best pipeline steps
print('\nBest pipeline steps:', end='\n')
for idx, (name, transform) in enumerate(tpot.fitted_pipeline_.steps, start=1):
    # Print idx and transform
    print(f'{idx}. {transform}')


Optimization Progress:   0%|          | 0/120 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.7355558350100603

Generation 2 - Current best internal CV score: 0.7355558350100603

Generation 3 - Current best internal CV score: 0.7355558350100603

Generation 4 - Current best internal CV score: 0.7355558350100603

Generation 5 - Current best internal CV score: 0.7355558350100603

Best pipeline: LogisticRegression(input_matrix, C=25.0, dual=False, penalty=l2)

AUC score: 0.8042

Best pipeline steps:
1. LogisticRegression(C=25.0, random_state=42)


In [None]:
# prompt: give accuracy for randomforest

from sklearn.ensemble import RandomForestClassifier
# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100)
# Fit the model on the training data
rf.fit(X_train, y_train)
# Make predictions on the test data
y_pred = rf.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7155172413793104


In [None]:
# prompt: give accuracy for naive bayes

from sklearn.naive_bayes import GaussianNB
# Create a Gaussian Naive Bayes classifier
nb = GaussianNB()
# Fit the model on the training data
nb.fit(X_train, y_train)
# Make predictions on the test data
y_pred = nb.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.75


In [None]:
%pip install scikit-learn



In [None]:
# prompt: give accuracy for logistic regression
from sklearn.linear_model import LogisticRegression
# Create a Logistic Regression classifier
lr = LogisticRegression()
# Fit the model on the training data
lr.fit(X_train, y_train)
# Make predictions on the test data
y_pred = lr.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7413793103448276


In [None]:
!pip install gradio


Collecting typing-extensions~=4.0 (from gradio)
  Using cached typing_extensions-4.9.0-py3-none-any.whl (32 kB)
Installing collected packages: typing-extensions
  Attempting uninstall: typing-extensions
    Found existing installation: typing_extensions 4.5.0
    Uninstalling typing_extensions-4.5.0:
      Successfully uninstalled typing_extensions-4.5.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.[0m[31m
[0mSuccessfully installed typing-extensions-4.9.0


In [None]:
# prompt: create a ui using gradio which need to get name from the use and give drop down for blood group and i need another dropdown for city contain(Tenkasi,Theni,Coimbatore,Trichy,Dindigul,Tiruppur,Thanjavur,Kanyakumari,Chennai,Thirunelveli,Madurai) and submit button once clicked the submit button it need to check with tha available blood group in tha city blood group must be same and it need to drop down or listed the hospital name if not available blood in any hospital then arrange the hopital_name based on Number of Donations in decending order  all the data get by blood-train.csv

import gradio as gr
import pandas as pd

def predict(name, blood_group, city):
  df = pd.read_csv("blood-train.csv")
  df = df[df["Blood_group"] == blood_group]
  df = df[df["City"] == city]
  if len(df) == 0:
    return "No available blood in your city"
  else:
    hospitals = df["Hospital_name"].tolist()
    return hospitals

interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Name"),
        gr.Dropdown(label="Blood Group", choices=["A+", "A-", "B+", "B-", "O+", "O-", "AB+", "AB-"]),
        gr.Dropdown(label="City", choices=["Tenkasi", "Theni", "Coimbatore", "Trichy", "Dindigul", "Tiruppur", "Thanjavur", "Kanyakumari", "Chennai", "Thirunelveli", "Madurai"])
    ],
    outputs=gr.Textbox(label="Hospital Name")
)

interface.launch()


ImportError: ignored

In [None]:
import gradio as gr
import pandas as pd

def predict(name, blood_group, city):
    df = pd.read_csv("blood-train.csv")
    df = df[df["Blood_group"] == blood_group]
    df = df[df["City"] == city]
    if len(df) == 0:
        return "No available blood in your city"
    else:
        sorted_df = df.sort_values(by="Number of Donations", ascending=False)
        hospitals = sorted_df["Hospital_name"].tolist()
        return hospitals

interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Name"),
        gr.Dropdown(
            label="Blood Group",
            choices=[
                "A+", "A-", "B+", "B-", "O+",
                "O-", "AB+", "AB-", "A1+", "A1-"
            ]
        ),
        gr.Dropdown(
            label="City",
            choices=[
                "Tenkasi", "Theni", "Coimbatore",
                "Trichy", "Dindigul", "Tiruppur",
                "Thanjavur", "Kanyakumari", "Chennai",
                "Thirunelveli", "Madurai"
            ]
        )
    ],
    outputs=gr.Textbox(label="Hospital Name")
)

interface.launch()


ImportError: ignored

In [None]:
import gradio as gr
import pandas as pd

def predict(name, blood_group, city):
    df = pd.read_csv("blood-train.csv")
    df = df[df["Blood_group"] == blood_group]
    df = df[df["City"] == city]
    if len(df) == 0:
        temp=pd.read_csv("blood-train.csv")
        temp=temp[temp["City"]==city]
        sorted_temp=temp.sort_values(by="Number of Donations", ascending=False)
        hospitals = sorted_temp["Hospital_name"].tolist()
        print("No Blood")
        return hospitals
    else:
        sorted_df = df.sort_values(by="Number of Donations", ascending=False)
        hospitals = sorted_df["Hospital_name"].tolist()
        return hospitals

interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(label="Name"),
        gr.Dropdown(
            label="Blood Group",
            choices=[
                "A+", "A-", "B+", "B-", "O+",
                "O-", "AB+", "AB-", "A1+", "A1-"
            ]
        ),
        gr.Dropdown(
            label="City",
            choices=[
                "Tenkasi", "Theni", "Coimbatore",
                "Trichy", "Dindigul", "Tiruppur",
                "Thanjavur", "Kanyakumari", "Chennai",
                "Thirunelveli", "Madurai"
            ]
        )
    ],
    outputs=gr.Textbox(label="Hospital Name")
)

interface.launch()
