Importing Essential Library

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


Loading Datasets - Already missing values has been handled

In [27]:
LifeData = pd.read_csv('/content/drive/MyDrive/21WeeksOfML/Project1_Regression/Life_Expectancy_Data.csv')

Handling Categorical value


In [31]:
label_encoders = {}
for col in LifeData.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    LifeData[col] = le.fit_transform(LifeData[col])
    label_encoders[col] = le

In [32]:
LifeData.head()

Unnamed: 0,Country,Year,Status,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,...,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
0,0,2015,1,65.0,263.0,62,0.01,71.279624,65.0,1154,...,6.0,8.16,65.0,0.1,584.25921,1386542.0,17.2,17.3,0.479,10.1
1,0,2014,1,59.9,271.0,64,0.01,73.523582,62.0,492,...,58.0,8.18,62.0,0.1,612.696514,1386542.0,17.5,17.5,0.476,10.0
2,0,2013,1,59.9,268.0,66,0.01,73.219243,64.0,430,...,62.0,8.13,64.0,0.1,631.744976,1386542.0,17.7,17.7,0.47,9.9
3,0,2012,1,59.5,272.0,69,0.01,78.184215,67.0,2787,...,67.0,8.52,67.0,0.1,669.959,1386542.0,17.9,18.0,0.463,9.8
4,0,2011,1,59.2,275.0,71,0.01,7.097109,68.0,3013,...,68.0,7.87,68.0,0.1,63.537231,1386542.0,18.2,18.2,0.454,9.5


In [33]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define features and target
X = LifeData.drop(columns=['Life expectancy '])
y = LifeData['Life expectancy ']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 1.075454081632653
RMSE: 1.658449157237826
R² Score: 0.9682642526692193


In [35]:
import joblib

joblib.dump(model, "/content/drive/MyDrive/21WeeksOfML/Project1_Regression/life_expectancy_model.pkl")
joblib.dump(label_encoders, "/content/drive/MyDrive/21WeeksOfML/Project1_Regression/label_encoders.pkl")

['/content/drive/MyDrive/21WeeksOfML/Project1_Regression/label_encoders.pkl']

In [41]:
# Save this as life_expectancy_app.py
import streamlit as st
import pandas as pd
import numpy as np
import joblib

# Load model and encoders
model = joblib.load("life_expectancy_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")

st.title("🌍 Life Expectancy Predictor")

# Input fields
country = st.text_input("Country")
year = st.number_input("Year", min_value=1900, max_value=2100, value=2015)
status = st.selectbox("Status", ["Developing", "Developed"])
adult_mortality = st.number_input("Adult Mortality")
infant_deaths = st.number_input("Infant Deaths")
alcohol = st.number_input("Alcohol Consumption")
percentage_expenditure = st.number_input("Percentage Expenditure")
hepatitis_b = st.number_input("Hepatitis B")
measles = st.number_input("Measles")
bmi = st.number_input("BMI")
under_five_deaths = st.number_input("Under-Five Deaths")
polio = st.number_input("Polio")
total_expenditure = st.number_input("Total Expenditure")
diphtheria = st.number_input("Diphtheria")
hiv_aids = st.number_input("HIV/AIDS")
gdp = st.number_input("GDP")
population = st.number_input("Population")
thinness_1_19 = st.number_input("Thinness 1-19 years")
thinness_5_9 = st.number_input("Thinness 5-9 years")
income_composition = st.number_input("Income Composition of Resources")
schooling = st.number_input("Schooling")

# Prepare input
input_data = {
    'Country': label_encoders['Country'].transform([country])[0] if country in label_encoders['Country'].classes_ else 0,
    'Year': year,
    'Status': label_encoders['Status'].transform([status])[0],
    'Adult Mortality': adult_mortality,
    'infant deaths': infant_deaths,
    'Alcohol': alcohol,
    'percentage expenditure': percentage_expenditure,
    'Hepatitis B': hepatitis_b,
    'Measles ': measles,
    ' BMI ': bmi,
    'under-five deaths ': under_five_deaths,
    'Polio': polio,
    'Total expenditure': total_expenditure,
    'Diphtheria ': diphtheria,
    ' HIV/AIDS': hiv_aids,
    'GDP': gdp,
    'Population': population,
    ' thinness  1-19 years': thinness_1_19,
    ' thinness 5-9 years': thinness_5_9,
    'Income composition of resources': income_composition,
    'Schooling': schooling
}

input_df = pd.DataFrame([input_data])

# Predict
if st.button("Predict Life Expectancy"):
    prediction = model.predict(input_df)[0]
    st.success(f"Predicted Life Expectancy: {prediction:.2f} years")


