In [1]:
# --- 1. Load Libraries and Prepare Original Data ---
import pandas as pd
import requests
import xgboost as xgb
from sklearn.model_selection import train_test_split
import joblib

# Load and merge the student datasets
mat_df = pd.read_csv('student-mat.csv', sep=';')
por_df = pd.read_csv('student-por.csv', sep=';')
merge_keys = ["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","nursery","internet"]
df_merged = pd.merge(mat_df, por_df, on=merge_keys, how='outer', suffixes=('_math', '_por'))

# Consolidate and engineer features
for col in ['G1', 'G2', 'G3', 'absences', 'failures']:
    df_merged[col] = df_merged[col + '_math'].fillna(df_merged[col + '_por'])
df_merged['at_risk'] = (df_merged['G3'] < 10).astype(int)

# Create the final, clean dataframe
final_columns = merge_keys + ['traveltime','studytime','failures','schoolsup','famsup','paid','activities','higher','romantic','famrel','freetime','goout','Dalc','Walc','health','absences','at_risk']
final_df = df_merged[merge_keys].copy()
for col in final_columns:
    if col in merge_keys or col == 'at_risk': continue
    final_df[col] = df_merged[col + '_math'].fillna(df_merged[col + '_por'])
final_df['at_risk'] = df_merged['at_risk']


# --- 2. Enrich Data with Weather API ---
def get_bad_weather_days(latitude, longitude, start_date, end_date):
    """Uses the Open-Meteo API to count days with significant rainfall (>10mm)."""
    API_URL = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude, "longitude": longitude,
        "start_date": start_date, "end_date": end_date,
        "daily": "rain_sum", "timezone": "Europe/London"
    }
    response = requests.get(API_URL, params=params)
    data = response.json()
    if 'daily' not in data: return 0
    df = pd.DataFrame(data['daily'])
    bad_weather_days = df[df['rain_sum'] > 10].shape[0]
    return bad_weather_days

# Get weather data for the school's location during the school year
school_lat = 38.52
school_lon = -8.01
school_year_start = "2015-09-01"
school_year_end = "2016-06-30"
num_bad_days = get_bad_weather_days(school_lat, school_lon, school_year_start, school_year_end)

# Add the new feature to our dataframe!
final_df['bad_weather_days'] = num_bad_days

print("Data enrichment complete!")
print(f"Number of heavy rain days added to each student's record: {num_bad_days}")
print("\nFirst 5 rows of the new, enriched dataset:")
print(final_df[['age', 'failures', 'absences', 'address', 'bad_weather_days', 'at_risk']].head())

ModuleNotFoundError: No module named 'xgboost'

In [2]:
pip install pandas xgboost scikit-learn joblib requests

Note: you may need to restart the kernel to use updated packages.


ERROR: Invalid requirement: 'joblib\xa0requests': Expected end or semicolon (after name and no valid version specifier)
    joblib requests
          ^


In [3]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install xgboost

Collecting xgboost
  Using cached xgboost-3.0.5-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
Installing collected packages: xgboost
Successfully installed xgboost-3.0.5
Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [1]:
# --- 1. Load Libraries and Prepare Original Data ---
import pandas as pd
import requests
import xgboost as xgb
from sklearn.model_selection import train_test_split
import joblib

# Load and merge the student datasets
mat_df = pd.read_csv('student-mat.csv', sep=';')
por_df = pd.read_csv('student-por.csv', sep=';')
merge_keys = ["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","nursery","internet"]
df_merged = pd.merge(mat_df, por_df, on=merge_keys, how='outer', suffixes=('_math', '_por'))

# Consolidate and engineer features
for col in ['G1', 'G2', 'G3', 'absences', 'failures']:
    df_merged[col] = df_merged[col + '_math'].fillna(df_merged[col + '_por'])
df_merged['at_risk'] = (df_merged['G3'] < 10).astype(int)

# Create the final, clean dataframe
final_columns = merge_keys + ['traveltime','studytime','failures','schoolsup','famsup','paid','activities','higher','romantic','famrel','freetime','goout','Dalc','Walc','health','absences','at_risk']
final_df = df_merged[merge_keys].copy()
for col in final_columns:
    if col in merge_keys or col == 'at_risk': continue
    final_df[col] = df_merged[col + '_math'].fillna(df_merged[col + '_por'])
final_df['at_risk'] = df_merged['at_risk']


# --- 2. Enrich Data with Weather API ---
def get_bad_weather_days(latitude, longitude, start_date, end_date):
    """Uses the Open-Meteo API to count days with significant rainfall (>10mm)."""
    API_URL = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": latitude, "longitude": longitude,
        "start_date": start_date, "end_date": end_date,
        "daily": "rain_sum", "timezone": "Europe/London"
    }
    response = requests.get(API_URL, params=params)
    data = response.json()
    if 'daily' not in data: return 0
    df = pd.DataFrame(data['daily'])
    bad_weather_days = df[df['rain_sum'] > 10].shape[0]
    return bad_weather_days

# Get weather data for the school's location during the school year
school_lat = 38.52
school_lon = -8.01
school_year_start = "2015-09-01"
school_year_end = "2016-06-30"
num_bad_days = get_bad_weather_days(school_lat, school_lon, school_year_start, school_year_end)

# Add the new feature to our dataframe!
final_df['bad_weather_days'] = num_bad_days

print("Data enrichment complete!")
print(f"Number of heavy rain days added to each student's record: {num_bad_days}")
print("\nFirst 5 rows of the new, enriched dataset:")
print(final_df[['age', 'failures', 'absences', 'address', 'bad_weather_days', 'at_risk']].head())

FileNotFoundError: [Errno 2] No such file or directory: 'student-mat.csv'