Original code by ***__Raghavender Ganesh__***.   
Updated with better documentation and code readability.

## ***__4. Implementing data mining on non clinical data__***
### ***__Non clinical data inludes factors like diet, smoking, physical activity, etc.__***

### ***__Libraries__***

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [None]:
import warnings
warnings.filterwarnings('ignore')

### ***__Sample data__***

In [None]:
data={
    "Age": np.random.randint(20,80,100),
    "BMI": np.random.randint(18,35,100),
    "Smoking": np.random.randint(0,2,100),
    "Alchohol_Consumption": np.random.randint(0,4,100),
    "Physical_Activity": np.random.randint(0,4,100),
    "Chronic_Disease": np.random.randint(0,2,100)
}

### ***__Loading dataset into pandas dataframe__***

In [None]:
df = pd.DataFrame(data)
df.head()

### ***__Scaling data__***

In [None]:
features=['Age','BMI','Smoking','Alchohol_Consumption','Physical_Activity','Chronic_Disease']

scaler=StandardScaler()
df[features]=scaler.fit_transform(df[features])
df.head()

### ***__Pairplot__***

In [None]:
#sns.pairplot(df)
sns.pairplot(df, hue = 'Chronic_Disease', palette = 'deep')

### ***__Correlation Matrix__***

In [None]:
sns.heatmap(df.corr(), annot = True, cmap = 'Blues')

### ***__PCA__***

In [None]:
pca = PCA(n_components=2)

# Apply PCA on features
pca_data = pca.fit_transform(df[features])
pca_df = pd.DataFrame(pca_data, columns=['PCA1', 'PCA2'])

# This is for label
pca_df['Chronic_Disease'] = df['Chronic_Disease']
pca_df.head()

### ***__Scatter plot__***

In [None]:
plt.scatter(pca_df['PCA1'], pca_df['PCA2'], c = pca_df['Chronic_Disease'])
plt.xlabel("PCA1")
plt.ylabel("PCA2")
plt.title("PCA")
plt.show()

### ***__K-Means Clustering__***

In [None]:
kmeans = KMeans(n_clusters=2,random_state=0)

df['Clusters'] = kmeans.fit_predict(df[features])
pca_df['Clusters'] = df['Clusters']

### ***__Scatter plot__***

In [None]:
plt.scatter(pca_df['PCA1'], pca_df['PCA2'], c = pca_df['Clusters'])
plt.xlabel("PCA1")
plt.ylabel("PCA2")
plt.title("K-Means")