# Statistical task

In [31]:
import pandas as pd

student_mat_df = pd.read_csv('student-mat.csv', sep=';')
student_por_df = pd.read_csv('student-por.csv', sep=';', quoting=3)

# Clean the extra quotes from student_por_df
for col in student_por_df.columns:
    if student_por_df[col].dtype == 'object':
        student_por_df[col] = student_por_df[col].str.replace('""', '', regex=False)

# Convert G1, G2, G3 to numeric in student_por_df
student_por_df['G1'] = pd.to_numeric(student_por_df['G1'], errors='coerce')
student_por_df['G2'] = pd.to_numeric(student_por_df['G2'], errors='coerce')
student_por_df['G3'] = pd.to_numeric(student_por_df['G3'], errors='coerce')

In [34]:
print('Student Mat Dataset has: ', student_mat_df.shape[0], 'rows and', student_mat_df.shape[1], 'columns')
print('Student Por Dataset has: ', student_por_df.shape[0], 'rows and', student_por_df.shape[1], 'columns')

Student Mat Dataset has:  395 rows and 33 columns
Student Por Dataset has:  650 rows and 33 columns


In [35]:
por_numeric = student_por_df.select_dtypes(include='number').columns.tolist()
por_categorical = student_por_df.select_dtypes(include=['object', 'category']).columns.tolist()
print(f"Numeric Features ({len(por_numeric)}):")
print(por_numeric)
print(f"\nCategorical Features ({len(por_categorical)}):")
print(por_categorical)

Numeric Features (16):
['age', 'Medu', 'Fedu', 'traveltime', 'studytime', 'failures', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3']

Categorical Features (17):
['school', 'sex', 'address', 'famsize', 'Pstatus', 'Mjob', 'Fjob', 'reason', 'guardian', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic']


## Result Explanation
### 1. Reading the CSV files
The original CSV files were not formatted well, so I applied some adjustments to load both files properly into DataFrames.

### 2. Retrieving rows and columns
I used the .shape attribute to find the number of rows and columns in each DataFrame.
**shape[0]** gives the number of rows
**shape[1]** gives the number of columns

### 3. Listing categorical and numeric features
I used **select_dtypes()** to separate the categorical and numeric columns in the dataset.