In [None]:
# Import Libraries for both projects
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, mean_squared_error, r2_score

# Glass Identification Project

# Load the dataset
glass_url = 'https://github.com/FlipRoboTechnologies/ML-Datasets/blob/main/Glass%20Identification/Glass%20Identification.csv'
glass_data = pd.read_csv(glass_url)

# Explore the dataset
print(glass_data.head())
print(glass_data.describe())
print(glass_data.info())

# Data Preprocessing
print(glass_data.isnull().sum())

# Split the data into features and target
X_glass = glass_data.drop(columns=['Type of glass', 'Id number'])
y_glass = glass_data['Type of glass']

# Standardize the features
scaler = StandardScaler()
X_glass_scaled = scaler.fit_transform(X_glass)

# Train-Test Split
X_glass_train, X_glass_test, y_glass_train, y_glass_test = train_test_split(X_glass_scaled, y_glass, test_size=0.2, random_state=42)

# Model Training
glass_model = RandomForestClassifier(n_estimators=100, random_state=42)
glass_model.fit(X_glass_train, y_glass_train)

# Model Evaluation
y_glass_pred = glass_model.predict(X_glass_test)
print(confusion_matrix(y_glass_test, y_glass_pred))
print(classification_report(y_glass_test, y_glass_pred))
print(f'Accuracy: {accuracy_score(y_glass_test, y_glass_pred) * 100:.2f}%')

# Visualization
feature_importances = glass_model.feature_importances_
features = X_glass.columns

plt.figure(figsize=(10, 6))
sns.barplot(x=feature_importances, y=features)
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Feature Importances for Glass Classification')
plt.show()

# Student Grades Prediction Project

# Load the dataset
grades_url = 'https://github.com/FlipRoboTechnologies/ML-Datasets/blob/main/Grades/Grades.csv'
grades_data = pd.read_csv(grades_url)

# Explore the dataset
print(grades_data.head())
print(grades_data.describe())
print(grades_data.info())

# Data Preprocessing
print(grades_data.isnull().sum())

# Split the data into features and target
X_grades = grades_data.drop(columns=['CGPA', 'Seat No'])
y_grades = grades_data['CGPA']

# Standardize the features
X_grades_scaled = scaler.fit_transform(X_grades)

# Train-Test Split
X_grades_train, X_grades_test, y_grades_train, y_grades_test = train_test_split(X_grades_scaled, y_grades, test_size=0.2, random_state=42)

# Model Training
grades_model = LinearRegression()
grades_model.fit(X_grades_train, y_grades_train)

# Model Evaluation
y_grades_pred = grades_model.predict(X_grades_test)
print(f'Mean Squared Error: {mean_squared_error(y_grades_test, y_grades_pred):.2f}')
print(f'R^2 Score: {r2_score(y_grades_test, y_grades_pred):.2f}')

# Visualization
plt.figure(figsize=(10, 6))
plt.scatter(y_grades_test, y_grades_pred)
plt.xlabel('Actual CGPA')
plt.ylabel('Predicted CGPA')
plt.title('Actual vs Predicted CGPA')
plt.show()
