In [None]:
# Import packages

import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
# Importing database and viewing the first 10 rows

df = pd.read_csv('../input/students-performance-in-exams/StudentsPerformance.csv')
df.head(10)

In [None]:
# Convering column names to uppercase for greater readability

df.columns = [x.upper() for x in df.columns]
df.columns

In [None]:
# Average passing score is 70.
# Making a new column for pass/fail dependant on their score.

df['MATH RESULT']=np.where(df['MATH SCORE'] >= 70, 'Pass', 'Fail')
df['READING RESULT']=np.where(df['READING SCORE'] >= 70, 'Pass', 'Fail')
df['WRITING RESULT']=np.where(df['WRITING SCORE'] >= 70, 'Pass', 'Fail')

In [None]:
# Making a new column for the percentage of their results and their final results.

df['%'] = round((df['MATH SCORE']+ df['READING SCORE']+ df['WRITING SCORE'])/3, 2)
df['FINAL RESULT'] = df.apply(lambda x: 'PASS' if x['%']>=70 else 'FAIL', axis=1)
df.head()

In [None]:
# Seeing if the data has any null values and their data type

df.info()

In [None]:
# Describing the data

df.describe()

In [None]:
# Overview of the data (scores v gender)

sns.pairplot(df, hue="GENDER")

In [None]:
# Seeing the percentage of genders within the dataset

df['GENDER'].value_counts(normalize=True)

In [None]:
# Showing the percentage of genders using a pie chart

plt.figure(figsize=(10,7))
labels = 'Female', 'Male'
genders = ['51', '48']
colors = ['#5975a4','#cc8963']
plt.pie(genders, colors=colors, labels=labels, autopct='%1.1f%%', wedgeprops={'edgecolor': 'black'})
plt.title("Gender of test takers")
plt.legend()

In [None]:
# Final overall results dependant on gender

plt.figure(figsize=(8,7))
sns.set_style('darkgrid')
sns.countplot(x='FINAL RESULT', data = df, hue='GENDER')
plt.title("Final overall results")

In [None]:
# Math results v gender

sns.displot(data=df, y="MATH RESULT", hue="GENDER", col="GENDER").set(title='Math results and gender')

In [None]:
# English reading and writing results

colors = ['#5975a4','#cc8963']
sns.displot(
    df, x="WRITING RESULT", col="READING RESULT", row="GENDER",
    binwidth=3, height=3, facet_kws=dict(margin_titles=True),)

In [None]:
# Overall passing grade v race

plt.figure(figsize=(10,7))
sns.barplot(x = 'RACE/ETHNICITY', y = '%',hue = 'GENDER', data = df, order=["group A", "group B", "group C", "group D", "group E"]).set(title='Overall passing rate with race')

In [None]:
# Overall percentage results v parental level of education

plt.figure(figsize=(10,7))
ax = sns.barplot(x="PARENTAL LEVEL OF EDUCATION", y="%", hue="GENDER", data=df, capsize=.2).set_title("Parental level of education v percentage results seperated by gender")
# ax.set_xticklabels(ax.get_xticklabels(),rotation = 10)

In [None]:
# Test prep v Percentage passing rate divided by gender

sns.catplot(x="TEST PREPARATION COURSE", y="%", hue="GENDER", data=df).set(title="Test Prep v Percentage passing rate")

In [None]:
# Lunch status v Percentage passing rate divided by gender
# Lunch status seperates low income and high income households

sns.catplot(x="LUNCH", y="%", hue="GENDER", data=df).set(title="Lunch status v Passing percentage")