In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# World Happiness Report Project
# Step 1: Load the Dataset
happiness_url = 'https://github.com/FlipRoboTechnologies/ML-Datasets/blob/main/World%20Happiness/happiness_score_dataset.csv?raw=true'
happiness_df = pd.read_csv(happiness_url)

# Display the first few rows of the dataset
print(happiness_df.head())

# Step 2: Explore the Dataset
# Display dataset information
print(happiness_df.info())

# Display statistical summary
print(happiness_df.describe())

# Step 3: Preprocess the Data
# Check for missing values
print(happiness_df.isnull().sum())

# Drop rows with missing values (if any)
happiness_df.dropna(inplace=True)

# Step 4: Feature Selection and Engineering
# Select features and target variable
features = ['Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)', 'Freedom', 'Generosity', 'Trust (Government Corruption)', 'Dystopia Residual']
target = 'Happiness Score'

X = happiness_df[features]
y = happiness_df[target]

# Step 5: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train a Regression Model
# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# Titanic Survived Project
# Step 1: Load the Dataset
titanic_url = 'https://github.com/FlipRoboTechnologies/ML-Datasets/blob/main/Titanic/titanic_train.csv?raw=true'
titanic_df = pd.read_csv(titanic_url)

# Display the first few rows of the dataset
print(titanic_df.head())

# Step 2: Explore the Dataset
# Display dataset information
print(titanic_df.info())

# Display statistical summary
print(titanic_df.describe())

# Step 3: Preprocess the Data
# Check for missing values
print(titanic_df.isnull().sum())

# Fill missing values
titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)
titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0], inplace=True)

# Drop the 'Cabin' column as it has too many missing values
titanic_df.drop(columns=['Cabin'], inplace=True)

# Encode categorical variables
titanic_df = pd.get_dummies(titanic_df, columns=['Sex', 'Embarked'], drop_first=True)

# Step 4: Feature Selection and Engineering
# Select features and target variable
features = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex_male', 'Embarked_Q', 'Embarked_S']
target = 'Survived'

X = titanic_df[features]
y = titanic_df[target]

# Step 5: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train a Classification Model
# Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)
