# Data Exploration for Student Admission Forecasting

This notebook is used for exploratory data analysis (EDA) on the admissions data. It includes visualizations and statistical summaries to understand trends and patterns in the data.

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualisation style
sns.set(style='whitegrid')

# Load the admissions data
data_path = '../data/processed/admissions_processed.csv'
admissions_data = pd.read_csv(data_path)

# Display the first few rows of the dataset
admissions_data.head()

In [None]:
# Summary statistics of the dataset
admissions_data.describe(include='all')

In [None]:
# Check for missing values
missing_values = admissions_data.isnull().sum()
missing_values[missing_values > 0]

In [None]:
# Visualize the distribution of admission applications over time
admissions_data['created_at'] = pd.to_datetime(admissions_data['created_at'])
admissions_data.set_index('created_at', inplace=True)

# Resample the data to get daily counts
daily_admissions = admissions_data.resample('D').size()

# Plot the daily admissions
plt.figure(figsize=(14, 7))
plt.plot(daily_admissions, marker='o', linestyle='-')
plt.title('Daily Admissions Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Admissions')
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
# Visualize the total admissions for July and August
july_august_admissions = daily_admissions['2023-07':'2023-08']

plt.figure(figsize=(14, 7))
plt.bar(july_august_admissions.index, july_august_admissions.values, color='skyblue')
plt.title('Total Admissions for July and August 2023')
plt.xlabel('Date')
plt.ylabel('Number of Admissions')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.show()