In [None]:
import pandas as pd
df = pd.read_csv('diabetes.csv')
df.head() #View first rows

In [None]:
df.info()

In [None]:
df.describe() 

# Plotting the categorical data 

## Matplotlib - pandas



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
#creating the bars using pandas
df["Outcome"].value_counts().plot.bar()
# Alternatively using pie.bar()
#Adding the aesthetics
plt.title('Diabetes chart')
plt.xlabel('Outcome')
plt.ylabel('Frequency')
plt.show()

## Matplotlib (only)

In [None]:
counts = df["Outcome"].value_counts()


In [None]:
counts

In [None]:
plt.bar(counts.index, counts)
plt.show()

## Seaborn 

[Visualizing the categorical data](https://seaborn.pydata.org/tutorial/categorical.html)

In [None]:
# import libraries
import seaborn as sns

# set plot style: grey grid in the background:
sns.set(style="darkgrid")

# plot a bar chart
sns.countplot(x="Outcome", data=df)

# Ploting the quantitiative data

## Histograms

### Matplotlib (one column)

In [None]:
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt

# Add the histogram
plt.hist(df['BloodPressure'])

# Change orientation = 'horizontal' 
# Change default range=[40, 100]
# log scaling log=True

# Show the plot
plt.show()

### Matplotlib all (first method)

In [None]:
import matplotlib.pyplot as plt
df.hist()
# Adjust layout
plt.tight_layout()
plt.show()

### Matplotlib all (second method)

In [None]:
# Initialize a 3x3 charts
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=3, ncols=3)

#print(axes)
# Flatten the axes array (makes it easier to iterate over)
axes = axes.flatten()
#print(axes)

# Loop through each column and plot a histogram
# enumerate here counts the number of columns of df
for i, column in enumerate(df.columns):
    
    # Add the histogram
    df[column].plot(ax=axes[i],kind='hist')
    
    # Add title and axis label
#    axes[i].set_title(column) 
    axes[i].set_xlabel(column) 
    axes[i].set_ylabel('Frequency') 

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
df.columns

## Seaborn

### Seaborn one column

In [None]:
import seaborn as sns

sns.histplot(x="BloodPressure",data=df)

# change number of bins: bins =20  
# change width of the bin: binwidth=20
# change style of the bin: element = 'step' or 'poly'
 

### Seaborn (all columns)

In [None]:
#Shift +Tab for help
import seaborn as sns
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
# Create the subplots
n_rows=3
n_cols=3
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols)
for i, column in enumerate(df.columns):
     sns.histplot(data=df[column],ax=axes[i//n_cols,i%n_cols])
#     print(i//n_cols,i%n_cols)
# In case you need to include  a separation of cancer no cancer patients
#    sns.histplot(data=df,x=df[column],hue='Outcome',multiple="stack",ax=axes[i//n_cols,i%n_cols])

# Adjust layout
plt.tight_layout()

## Boxplots

### Matplotlib

## Single boxplot

In [None]:
# Add the histogram
plt.boxplot(df['BloodPressure'])

# Change orientation = 'horizontal' 
# Change default range=[40, 100]
# log scaling log=True

# Show the plot
plt.show()

## Multiple single boxplots (pandas and matplotlib)

In [None]:
# Initialize a 3x3 charts
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=3, ncols=3)

#print(axes)
# Flatten the axes array (makes it easier to iterate over)
axes = axes.flatten()

# Loop through each column and plot a histogram
# enumerate here counts the number of columns of df
for i, column in enumerate(df.columns):
    
    # Add the histogram
    df[column].plot(ax=axes[i],kind='box')

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()

## Seaborn

In [None]:
#Shift +Tab for help
import seaborn as sns
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
# Create the subplots
n_rows=3
n_cols=3
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols)
for i, column in enumerate(df.columns):
#    sns.boxplot(data=df[column],ax=axes[i//n_cols,i%n_cols])
    sns.boxplot(data=df,y=df[column],x='Outcome',ax=axes[i//n_cols,i%n_cols])
# Adjust layout
plt.tight_layout()

## Violin plots

### Matplotlib

## Single

In [None]:
# Add the histogram
plt.violinplot(df['BloodPressure'])

# Change orientation = 'horizontal' 
# Change default range=[40, 100]
# log scaling log=True

# Show the plot
plt.show()

## Multiple single 

In [None]:
# Initialize a 3x3 charts
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=3, ncols=3)

#print(axes)
# Flatten the axes array (makes it easier to iterate over)
axes = axes.flatten()

# Loop through each column and plot a histogram
# enumerate here counts the number of columns of df
for i, column in enumerate(df.columns):
    
# Adds the violinplot from matplotlib
    axes[i].violinplot(df[column])

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()

### Seaborn 

## Single Violin plot

In [None]:
sns.violinplot(df.iloc[:,3])

### Multiple in one plot (for comparison)

In [None]:
sns.violinplot(df.iloc[:,0:3])

## Multiple single violinplots

In [None]:
#Shift +Tab for help
import seaborn as sns
import pandas as pd
df = pd.read_csv('diabetes.csv')
import matplotlib.pyplot as plt
# Create the subplots
n_rows=3
n_cols=3
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols)
for i, column in enumerate(df.columns):
    
#   Single violinplots  
#    sns.boxplot(data=df[column],ax=axes[i//n_cols,i%n_cols])

# including 2, the cancer - no cancer cases
    sns.violinplot(data=df,y=df[column],x='Outcome',ax=axes[i//n_cols,i%n_cols])

# Adjust layout
plt.tight_layout()

## Correlation plot

In [None]:
import seaborn as sns
import pandas as pd
df = pd.read_csv('diabetes.csv')
sns.heatmap(df.iloc[:,0:5].corr(), annot=True)
#sns.heatmap(df.corr(), annot=True)

In [None]:
df.iloc[:,0:5].corr()

## Pairplot (Seaborn)

In [None]:
sns.pairplot(df.iloc[:,0:3])
#sns.pairplot(df)

## Scatter plots

## Pandas

In [None]:
import pandas as pd

df.plot.scatter(x='Glucose', y='Pregnancies')


### Seaborn

In [None]:
import seaborn as sns
sns.scatterplot(x="Glucose",
                    y="Pregnancies",
#                   hue ='Outcome',                  
                    data=df)