In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
dir_path='D:/osic-pulmonary-fibrosis-progression'
print('Contents of Dataset:' + str(os.listdir(dir_path)))

datadf=pd.read_csv(dir_path+'/train.csv')
testdf=pd.read_csv(dir_path+'/test.csv')

datadf.head()

Contents of Dataset:['sample_submission.csv', 'test', 'test.csv', 'train', 'train.csv']


Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00007637202177411956430,-4,2315,58.253649,79,Male,Ex-smoker
1,ID00007637202177411956430,5,2214,55.712129,79,Male,Ex-smoker
2,ID00007637202177411956430,7,2061,51.862104,79,Male,Ex-smoker
3,ID00007637202177411956430,9,2144,53.950679,79,Male,Ex-smoker
4,ID00007637202177411956430,11,2069,52.063412,79,Male,Ex-smoker


In [3]:
#Shape of the dataframes

print("Shape of Train Data: "+ str(datadf.shape))
print('Shape of Test Data: '+ str(testdf.shape))

Shape of Train Data: (1549, 7)
Shape of Test Data: (5, 7)


In [4]:
dataN = datadf['Patient'].nunique()

print('Number of Patients in data: '+str(dataN))

Number of Patients in data: 176


In [5]:
statdf = datadf.groupby([datadf.Patient,datadf.Age,datadf.Sex,datadf.SmokingStatus])['Patient'].count()

statdf.index=statdf.index.set_names(['id','Age','Sex','SmokingStatus'])

statdf=statdf.reset_index()

statdf.rename(columns={'Patient':'freq'},inplace=True)
statdf.head()

Unnamed: 0,id,Age,Sex,SmokingStatus,freq
0,ID00007637202177411956430,79,Male,Ex-smoker,9
1,ID00009637202177434476278,69,Male,Ex-smoker,9
2,ID00010637202177584971671,60,Male,Ex-smoker,9
3,ID00011637202177653955184,72,Male,Ex-smoker,9
4,ID00012637202177665765362,65,Male,Never smoked,9


In [226]:
import plotly.express as px

fig=px.bar(statdf,x='id',y='freq',color='freq')
fig.update_layout(
    xaxis={'categoryorder': 'total ascending', 'title':'Patients'},
    title='Number of times FVC checked',
    yaxis={'title':'FVC entries'},
    width=1000
)
fig.update_xaxes(showticklabels=False)
fig.show()

In [7]:
import plotly.express as px

fig=px.histogram(statdf,x='Age',color='Sex')
fig.update_layout(
    title='Number of Patients w.r.t Age',
    width=1000,
)
fig.update_traces(
    marker_line_color='black', marker_line_width=1
)
fig.show()

In [248]:
fig=px.histogram(statdf,x='Sex')
fig.update_layout(
    title='Number of Patients wrt Sex',
    width=1000,
)
fig.update_traces(
    marker_line_color='black', marker_line_width=1
)
fig.show()

In [85]:
fig=px.histogram(statdf,x='SmokingStatus',color='Sex')
fig.update_layout(
    title='Number of Patients w.r.t Smoking Status',
    width=1000,
)
fig.update_traces(
    marker_line_color='black', marker_line_width=1.5
)
fig.show()

In [254]:
fig=px.violin(datadf,y='Weeks',color='Sex')
fig.update_layout(
    title='Visits to the Hospital w.r.t Weeks',
    width=1000,
    yaxis={
        'title':'Week'
        }
)

fig.show()

In [8]:
fig=px.scatter(datadf,x='Weeks',y='FVC')
fig.update_layout(
    title='FVC Values w.r.t Weeks',
    width=1000,
)

fig.show()

In [255]:
fig=px.violin(datadf,x='FVC', box =True, points='all')
fig.update_layout(
    title='FVC Spread',
    width=1000,
)
fig.update_traces(
    marker_line_color='black', marker_line_width=1.5
)
fig.show()

In [166]:
corrmat = datadf.corr('pearson') 
fig=px.imshow(corrmat, title='Pearson Correlation between attributes',color_continuous_scale='RdYlBu_r',origin='lower')


fig.show()

In [243]:
import plotly.graph_objects as go

patient1 = datadf[datadf.Patient == 'ID00007637202177411956430']
patient2 = datadf[datadf.Patient == 'ID00012637202177665765362']
patient3 = datadf[datadf.Patient == 'ID00082637202201836229724']

fig=go.Figure()

fig

fig.add_trace(go.Scatter(x=patient1['Weeks'], y=patient1['FVC'],
    mode='lines+markers',
    name='Ex-smoker',
    marker={
        'size' : 12
        
    }
    ))

fig.add_trace(go.Scatter(x=patient2['Weeks'], y=patient2['FVC'],
    mode='lines+markers',
    name='Never Smoked',
    marker={
        'size' : 12
    }
    ))

fig.add_trace(go.Scatter(x=patient3['Weeks'], y=patient3['FVC'],
    mode='lines+markers',
    name='Smoker',
    marker={
        'size' : 12
    }
    ))

fig.show()