In [1]:
import pandas as pd

# Reading Data

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df.head()

Unnamed: 0,fever,headache,age,cough,breathing,tiredness,probability
0,101,0,96,1,1,0,1
1,100,0,28,1,0,0,0
2,103,0,98,0,0,0,1
3,98,1,22,1,1,1,1
4,100,1,92,0,0,0,1


In [4]:
df.tail()

Unnamed: 0,fever,headache,age,cough,breathing,tiredness,probability
655,100,1,40,0,1,1,1
656,104,1,58,0,1,0,1
657,102,0,52,0,1,1,0
658,99,0,94,0,1,0,1
659,104,1,26,1,1,1,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660 entries, 0 to 659
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   fever        660 non-null    int64
 1   headache     660 non-null    int64
 2   age          660 non-null    int64
 3   cough        660 non-null    int64
 4   breathing    660 non-null    int64
 5   tiredness    660 non-null    int64
 6   probability  660 non-null    int64
dtypes: int64(7)
memory usage: 36.2 KB


In [6]:
df['fever'].value_counts()

100    113
101     98
102     96
104     92
99      90
98      87
103     84
Name: fever, dtype: int64

In [7]:
df['age'].value_counts()

54    13
7     13
75    13
90    12
69    11
      ..
6      3
61     2
93     2
25     2
3      1
Name: age, Length: 100, dtype: int64

In [8]:
df.describe()

Unnamed: 0,fever,headache,age,cough,breathing,tiredness,probability
count,660.0,660.0,660.0,660.0,660.0,660.0,660.0
mean,100.978788,0.481818,51.275758,0.478788,0.533333,0.478788,0.477273
std,1.953831,0.500048,28.55285,0.499929,0.499266,0.499929,0.499862
min,98.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,99.0,0.0,27.75,0.0,0.0,0.0,0.0
50%,101.0,0.0,52.5,0.0,1.0,0.0,0.0
75%,103.0,1.0,75.0,1.0,1.0,1.0,1.0
max,104.0,1.0,100.0,1.0,1.0,1.0,1.0


# train test splitting

In [9]:
import numpy as np

In [10]:
def data_split(data,ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int(len(data) * ratio)
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [11]:
np.random.permutation(7)

array([3, 1, 2, 6, 4, 0, 5])

In [12]:
train, test = data_split(df,0.2)

In [13]:
train

Unnamed: 0,fever,headache,age,cough,breathing,tiredness,probability
18,102,0,42,1,0,1,0
363,104,1,26,0,1,0,0
597,103,0,67,1,1,1,0
541,101,1,33,1,1,1,0
61,103,1,60,0,1,0,1
...,...,...,...,...,...,...,...
71,98,1,96,1,1,0,0
106,99,0,27,0,0,0,0
270,103,1,81,0,0,1,0
435,100,0,22,1,0,1,1


In [14]:
test

Unnamed: 0,fever,headache,age,cough,breathing,tiredness,probability
629,101,1,63,1,0,0,1
499,98,0,77,1,0,1,1
135,101,1,79,1,1,1,0
480,101,0,75,0,1,0,0
90,101,0,95,0,1,1,0
...,...,...,...,...,...,...,...
77,101,1,42,1,1,1,0
530,100,1,18,1,1,0,1
407,102,1,2,0,0,1,1
234,102,0,62,0,0,0,1


In [15]:
X_train = train[['fever','headache','age','cough','breathing','tiredness']]

In [16]:
X_train

Unnamed: 0,fever,headache,age,cough,breathing,tiredness
18,102,0,42,1,0,1
363,104,1,26,0,1,0
597,103,0,67,1,1,1
541,101,1,33,1,1,1
61,103,1,60,0,1,0
...,...,...,...,...,...,...
71,98,1,96,1,1,0
106,99,0,27,0,0,0
270,103,1,81,0,0,1
435,100,0,22,1,0,1


In [17]:
X_test = test[['fever','headache','age','cough','breathing','tiredness']]

In [18]:
X_test

Unnamed: 0,fever,headache,age,cough,breathing,tiredness
629,101,1,63,1,0,0
499,98,0,77,1,0,1
135,101,1,79,1,1,1
480,101,0,75,0,1,0
90,101,0,95,0,1,1
...,...,...,...,...,...,...
77,101,1,42,1,1,1
530,100,1,18,1,1,0
407,102,1,2,0,0,1
234,102,0,62,0,0,0


In [19]:
X_train.to_numpy()

array([[102,   0,  42,   1,   0,   1],
       [104,   1,  26,   0,   1,   0],
       [103,   0,  67,   1,   1,   1],
       ...,
       [103,   1,  81,   0,   0,   1],
       [100,   0,  22,   1,   0,   1],
       [102,   1,  36,   0,   0,   1]], dtype=int64)

In [20]:
X_train.to_numpy()

array([[102,   0,  42,   1,   0,   1],
       [104,   1,  26,   0,   1,   0],
       [103,   0,  67,   1,   1,   1],
       ...,
       [103,   1,  81,   0,   0,   1],
       [100,   0,  22,   1,   0,   1],
       [102,   1,  36,   0,   0,   1]], dtype=int64)

In [21]:
X_train = train[['fever','headache','age','cough','breathing','tiredness']].to_numpy()
X_test = train[['fever','headache','age','cough','breathing','tiredness']].to_numpy()

In [33]:
Y_train = train[['probability']].to_numpy().reshape(528 ,)
Y_test = test[['probability']].to_numpy().reshape(132 ,)

In [25]:
Y_train

array([0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1,

In [34]:
Y_test

array([1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
      dtype=int64)

In [27]:
from sklearn.linear_model import LogisticRegression

In [29]:
clf = LogisticRegression()
clf.fit(X_train,Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [37]:
inputFeatures = [101,0,64,1,0,0]
infprob =clf.predict_proba([inputFeatures])[0][1]

In [38]:
infprob

0.568564175650476