In [None]:
import pandas as pd
import os
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

# mounting a specific directory on my google drive for data storage and retrieval
os.chdir("/content/drive/My Drive/DMAV/")


In [None]:
df = pd.read_csv("oasis_longitudinal.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df = df.set_index('MRI ID')
df.head()

**Cleansing**

In [None]:
df.duplicated()

To see the rows that are duplicated use the result from df.duplicated() as a mask for the dataframe. 

In [None]:
df[df.duplicated()]

In [None]:
df_clean = df.drop_duplicates()

In [None]:
df_clean.info()  

In [None]:
df_clean[df_clean.index.duplicated()]

#Dropping irrelevant columns




In [None]:
df=df_clean.drop(['Subject ID','Hand','MR Delay','Visit','MMSE'],axis=1)

df.head()

**Visaulisation**

**Density plot of Clinical Dementia Rate(CDR) by normal Whole Brain Volume (nWBV)**

In [None]:

fig = plt.figure(figsize=(12,12))
facet= sns.FacetGrid(df,hue="CDR", aspect=3)
facet.map(sns.kdeplot,'nWBV',shade= True)
facet.set(xlim=(0.5,1))
plt.xlabel('normal Whole Brain Volume (nWBV)', size = 12); plt.ylabel('Density', size = 12); 
plt.title('Density plot of Clinical Dementia Rate(CDR) by normal Whole Brain Volume (nWBV) ', size = 16 );
facet.add_legend()
plt.savefig('nWBV_CDR.png')




**Age v/s nWBV of the patient**

In [None]:

fig = plt.figure(figsize=(12,12))
ax = sns.regplot(x="Age", y="nWBV",data=df,line_kws={"color": "red"})
plt.xlabel('Age', size = 12); plt.ylabel('normal Whole Brain Volume (nWBV)', size = 12); 
sns.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)
plt.title('Negative Correlation b/w Age and normal Whole Brain Volume (nWBV) ', size = 20 );
plt.savefig('Age_nWBV.png')






**Gender v/s nWBV of the patient**

In [None]:

#ax = sns.boxplot(x="Age", y="nWBV",hue='Gender',data=df)

# Create list of indicies of Gender counts
gender_count= df['Gender'].value_counts()
gender_indicies = list(gender_count.index)

# Plot of distribution of scores for building categories

fig = plt.figure(figsize=(12,10))

# Plot each building
for g in gender_indicies:
    # Select the Gender category
    subset = df[df['Gender']==g]      
    # Density plot of nWBV scores 
    sns.kdeplot(subset['nWBV'], label =g, shade = False, alpha = 0.5);
    #plt.legend()
   

sns.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)
plt.text(0.75,10.2,"-->Female",color="Black",size=12)
plt.text(0.715,11.1,"-->Male",color="Black",size=12)
# label the plot
plt.xlabel('normal Whole Brain Volume (nWBV)', size = 12); plt.ylabel('Density', size = 12); 
plt.title('Density Plot of normal Whole Brain Volume (nWBV)  by Gender ', size = 20 );
plt.savefig('Gender_nWBV.png')
  



**Education vs nWBV of the patient**

In [None]:
fig = plt.figure(figsize=(12,12))
ax = sns.stripplot(x="EDUC", y="nWBV", data=df, jitter=0.05)
sns.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)
ax.set(xlabel="Years of Education", ylabel = "normal Whole Brain Volume (nWBV)")
plt.title('Density Plot of normal Whole Brain Volume (nWBV)  by Gender ', size = 16, );
plt.title('Years of Education v/s normal Whole Brain Volume (nWBV)', size = 20 );
plt.savefig('EDUC_nWBV.png')

**Social Economy Status vs nWBV of the patient**

In [None]:
fig = plt.figure(figsize=(12,12))
ax = sns.stripplot(x="SES", y="nWBV", data=df, jitter=0.05)
sns.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)
ax.set(xlabel="Social Economy Status", ylabel = "normal Whole Brain Volume (nWBV)")
plt.title('Social Economy Status v/s normal Whole Brain Volume (nWBV)', size = 20);
plt.savefig('SES_nWBV.png')