In [1]:
import pandas as pd

##  Reading Data

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df.head()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreathe,infectionProb
0,99.998462,0,8,1,1,1
1,99.890802,0,24,1,0,1
2,98.516719,1,37,0,0,1
3,99.70819,0,16,0,-1,1
4,101.881519,0,86,1,0,1


In [4]:
df.tail()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreathe,infectionProb
2494,99.262841,0,36,1,-1,1
2495,100.6775,0,26,1,1,0
2496,98.746268,1,76,0,-1,1
2497,101.825053,1,26,1,1,1
2498,99.66239,1,44,0,0,1


In [5]:
df.info(())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2499 entries, 0 to 2498
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fever          2499 non-null   float64
 1   bodyPain       2499 non-null   int64  
 2   age            2499 non-null   int64  
 3   runnyNose      2499 non-null   int64  
 4   diffBreathe    2499 non-null   int64  
 5   infectionProb  2499 non-null   int64  
dtypes: float64(1), int64(5)
memory usage: 117.3 KB


In [6]:
df.describe()

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreathe,infectionProb
count,2499.0,2499.0,2499.0,2499.0,2499.0,2499.0
mean,100.002984,0.506202,50.839136,0.518607,0.003601,0.495398
std,1.146241,0.500062,29.235696,0.499754,0.827124,0.500079
min,98.002381,0.0,1.0,0.0,-1.0,0.0
25%,98.986062,0.0,26.0,0.0,-1.0,0.0
50%,100.00626,1.0,51.0,1.0,0.0,0.0
75%,100.986713,1.0,76.0,1.0,1.0,1.0
max,101.999859,1.0,100.0,1.0,1.0,1.0


## Train Test Splitting

In [7]:
import numpy as np

In [8]:
def data_split(data, ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int( len(data) * ratio )
    test_indices = shuffled[:test_set_size]
    train_indices = shuffled[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

    
    

In [9]:
train, test = data_split(df, 0.2)

In [10]:
test

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreathe,infectionProb
2319,100.364359,1,59,1,-1,1
1865,98.069523,1,92,1,0,1
902,99.549880,1,66,0,1,0
2240,99.090542,0,88,0,-1,1
1285,98.959761,0,52,1,0,0
...,...,...,...,...,...,...
1037,101.960252,0,55,1,1,0
2054,98.029331,1,33,0,0,0
1860,98.357603,0,1,0,-1,1
1862,99.914910,0,1,1,1,1


In [11]:
train

Unnamed: 0,fever,bodyPain,age,runnyNose,diffBreathe,infectionProb
461,98.736635,1,41,0,1,0
109,98.336465,1,48,1,0,1
2296,98.580702,1,77,1,-1,1
354,100.656121,1,60,1,-1,1
266,100.826554,0,95,0,-1,1
...,...,...,...,...,...,...
1638,98.279667,0,44,0,-1,0
1095,99.019134,0,88,0,-1,1
1130,101.815312,1,55,0,-1,0
1294,98.830159,0,2,1,1,0


In [12]:
X_train = train[['fever','bodyPain','age','runnyNose','diffBreathe']].to_numpy()
X_test = test[['fever','bodyPain','age','runnyNose','diffBreathe']].to_numpy()

In [13]:
X_train

array([[ 98.73663529,   1.        ,  41.        ,   0.        ,
          1.        ],
       [ 98.3364649 ,   1.        ,  48.        ,   1.        ,
          0.        ],
       [ 98.58070233,   1.        ,  77.        ,   1.        ,
         -1.        ],
       ...,
       [101.8153118 ,   1.        ,  55.        ,   0.        ,
         -1.        ],
       [ 98.83015895,   0.        ,   2.        ,   1.        ,
          1.        ],
       [101.1268726 ,   0.        ,  58.        ,   0.        ,
          1.        ]])

In [14]:
Y_train = train[['infectionProb']].to_numpy().reshape(2000,)
Y_test = test[['infectionProb']].to_numpy().reshape(499,)

In [15]:
Y_train

array([0, 1, 1, ..., 0, 0, 1], dtype=int64)

In [16]:
Y_test

array([1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,

In [17]:
from sklearn.linear_model import LogisticRegression

In [18]:
clf = LogisticRegression()
clf.fit(X_train, Y_train)

LogisticRegression()

In [19]:
inputFeatures = [100, 1, 22, -1 , 1]
infProb = clf.predict_proba([inputFeatures])[0][1]

In [20]:
infProb

0.4449636612250586