# Cleaning Data with Pandas

## Import Libraries

In [None]:
import pandas as pd

## Load the Dataset

### Student Mental Health

In [None]:
df = pd.read_csv("../datasets/student_mental_health.csv")

## Overview of the Dataset

In [None]:
df.head()

## Rename Columns

In [None]:
# Rename all columns to lowercase
df.columns = df.columns.str.lower()
# Replace column: "Choose your gender" with "gender"
df.rename(columns={"choose your gender": "gender"}, inplace=True)
# Replace column: "What is your course?" with "course"
df.rename(columns={"what is your course?": "course"}, inplace=True)
# Rename column: "your current year of study" with "seniority"
df.rename(columns={"your current year of study": "seniority"}, inplace=True)
# Replace multiple columns at the same time
df.rename(columns={
    "what is your cgpa?": "gpa",
    "marital status": "married",
    "do you have depression?": "depressed",
    "do you have anxiety?": "anxiety",
    "do you have panic attack?": "panic_attacks",
    "did you seek any specialist for a treatment?": "treatment"
}, inplace=True)

## View Updated Columns

In [None]:
df.head()

## Refactor Column Values

### Seniority String to Integer

In [None]:
# Change seniority to integer and remove extra words
df["seniority"] = df["seniority"].str.lower()
df["seniority"] = df["seniority"].str.replace("year ", "").astype(int)
df.head()

### Age Column to Integer

In [None]:
# Drop NaNs
df = df.dropna(subset=["age"])
df["age"] = df["age"].astype(int)
df.head()

### Yes and Nos to True and False

In [None]:
columns_to_convert = [
    "married",
    "depressed",
    "anxiety",
    "panic_attacks",
    "treatment"
]
# Convert Yes and No to True and False
df[columns_to_convert] = df[columns_to_convert].replace({
    "Yes": True,
    "No": False
})
# Set Data Types
df[columns_to_convert] = df[columns_to_convert].astype(bool)
df.head()

## Custom Date to Datetime

In [None]:
# Convert 'timestamp' column to a valid datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df.head()

# Use the Clean Data Easily

In [None]:
# Save the cleaned data to a new CSV file
df.to_csv("../datasets/updated/student_mental_health.csv", index=False)