## Algorithms Used for Classification
1. CART (Classification and Regression Trees)


### 1. CART (Classification and Regression Trees) - DecisionTree Classifier
- Sampling Technique - Train/Test Split (80:20)
- Classification Metrics - Accuracy
- Number of Models - 20 models 

In [None]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the dataset
filename = 'D:/MSU_IIT/4th Year/ITD105/ML Using Different Algorithms/Case Studies/Case Study 1/seattle-weather.csv'
dataframe = read_csv(filename)

# Use LabelEncoder to encode the 'weather' column
label_encoder = LabelEncoder()
dataframe['weather'] = label_encoder.fit_transform(dataframe['weather'])

# Extract features (X) and target variable (Y)
X = dataframe.drop(columns=['weather', 'date'])  # Assuming 'date' is not a useful feature
Y = dataframe['weather']

# Set the test size and random seed for reproducibility
test_size = 0.20
random_seed = 7  # You can change this value

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=random_seed)

model = DecisionTreeClassifier(
    max_depth= 5,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=random_seed
)

model2 = DecisionTreeClassifier(max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=random_seed)
model3 = DecisionTreeClassifier(max_depth=15, min_samples_split=8, min_samples_leaf=3, random_state=random_seed)
model4 = DecisionTreeClassifier(max_depth=20, min_samples_split=10, min_samples_leaf=4, random_state=random_seed)
model5 = DecisionTreeClassifier(max_depth=18, min_samples_split=6, min_samples_leaf=2, random_state=random_seed)
model6 = DecisionTreeClassifier(max_depth=25, min_samples_split=10, min_samples_leaf=5, random_state=random_seed)
model7 = DecisionTreeClassifier(max_depth=12, min_samples_split=7, min_samples_leaf=3, random_state=random_seed)
model8 = DecisionTreeClassifier(max_depth=18, min_samples_split=9, min_samples_leaf=4, random_state=random_seed)
model9 = DecisionTreeClassifier(max_depth=15, min_samples_split=5, min_samples_leaf=2, random_state=random_seed)
model10 = DecisionTreeClassifier(max_depth=20, min_samples_split=8, min_samples_leaf=3, random_state=random_seed)
model11 = DecisionTreeClassifier(max_depth=22, min_samples_split=11, min_samples_leaf=5, random_state=random_seed)
model12 = DecisionTreeClassifier(max_depth=14, min_samples_split=6, min_samples_leaf=2, random_state=random_seed)
model13 = DecisionTreeClassifier(max_depth=17, min_samples_split=8, min_samples_leaf=4, random_state=random_seed)
model14 = DecisionTreeClassifier(max_depth=23, min_samples_split=10, min_samples_leaf=5, random_state=random_seed)
model15 = DecisionTreeClassifier(max_depth=13, min_samples_split=7, min_samples_leaf=3, random_state=random_seed)
model16 = DecisionTreeClassifier(max_depth=19, min_samples_split=9, min_samples_leaf=4, random_state=random_seed)
model17 = DecisionTreeClassifier(max_depth=16, min_samples_split=5, min_samples_leaf=2, random_state=random_seed)
model18 = DecisionTreeClassifier(max_depth=21, min_samples_split=8, min_samples_leaf=3, random_state=random_seed)
model19 = DecisionTreeClassifier(max_depth=24, min_samples_split=11, min_samples_leaf=5, random_state=random_seed)
model20 = DecisionTreeClassifier(max_depth=11, min_samples_split=6, min_samples_leaf=2, random_state=random_seed)

# Make predictions on the test set and print results for each model
for i, clf in enumerate([model, model2, model3, model4, model5, model6, model7, model8, model9, model10,
                         model11, model12, model13, model14, model15, model16, model17, model18, model19, model20], 1):
    clf.fit(X_train, Y_train)
    predictions = clf.predict(X_test)
    accuracy = accuracy_score(Y_test, predictions)
    print(f"\nModel {i}")
    print("Accuracy: %.2f%%" % (accuracy * 100))


## Algorithms Used for Regression
8. Random Forest

### 8. Random Forest
- Sampling Technique = K-fold Cross Validation (k=10)
- Regression Metrics = MAE
- Number of Models - 20 models 

In [None]:
from pandas import read_csv
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load the dataset
filename = 'D:/MSU_IIT/4th Year/ITD105/ML Using Different Algorithms/Case Studies/Case Study 1/Rainfall_data.csv'
dataframe = read_csv(filename)

# Drop the 'Day' column
dataframe = dataframe.drop(columns=['Day'])

# Extract features (X) and target variable (Y)
X = dataframe.drop(columns=['Precipitation'])  # Features excluding 'Precipitation'
Y = dataframe['Precipitation']  # Target variable

# Set the test size
test_size = 0.20  # Hyperparameter: Fraction of the dataset to use for testing
seed = 42

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

# Train the data on a Random Forest Regressor with specified hyperparameters
model = RandomForestRegressor(
    n_estimators=100,      
    max_depth=None,         
    min_samples_split=2,    
    min_samples_leaf=1,     
    random_state=seed         
)

model2 = RandomForestRegressor(n_estimators=120, max_depth=8, min_samples_split=3, min_samples_leaf=1, random_state=seed)
model3 = RandomForestRegressor(n_estimators=140, max_depth=12, min_samples_split=5, min_samples_leaf=1, random_state=seed)
model4 = RandomForestRegressor(n_estimators=160, max_depth=15, min_samples_split=2, min_samples_leaf=1, random_state=seed)
model5 = RandomForestRegressor(n_estimators=180, max_depth=10, min_samples_split=4, min_samples_leaf=1, random_state=seed)
model6 = RandomForestRegressor(n_estimators=200, max_depth=18, min_samples_split=3, min_samples_leaf=1, random_state=seed)
model7 = RandomForestRegressor(n_estimators=220, max_depth=14, min_samples_split=5, min_samples_leaf=1, random_state=seed)
model8 = RandomForestRegressor(n_estimators=240, max_depth=20, min_samples_split=2, min_samples_leaf=1, random_state=seed)
model9 = RandomForestRegressor(n_estimators=260, max_depth=16, min_samples_split=4, min_samples_leaf=1, random_state=seed)
model10 = RandomForestRegressor(n_estimators=280, max_depth=22, min_samples_split=3, min_samples_leaf=1, random_state=seed)
model11 = RandomForestRegressor(n_estimators=300, max_depth=10, min_samples_split=5, min_samples_leaf=1, random_state=seed)
model12 = RandomForestRegressor(n_estimators=320, max_depth=24, min_samples_split=2, min_samples_leaf=1, random_state=seed)
model13 = RandomForestRegressor(n_estimators=340, max_depth=12, min_samples_split=4, min_samples_leaf=1, random_state=seed)
model14 = RandomForestRegressor(n_estimators=360, max_depth=26, min_samples_split=3, min_samples_leaf=1, random_state=seed)
model15 = RandomForestRegressor(n_estimators=380, max_depth=14, min_samples_split=5, min_samples_leaf=1, random_state=seed)
model16 = RandomForestRegressor(n_estimators=400, max_depth=28, min_samples_split=2, min_samples_leaf=1, random_state=seed)
model17 = RandomForestRegressor(n_estimators=420, max_depth=16, min_samples_split=4, min_samples_leaf=1, random_state=seed)
model18 = RandomForestRegressor(n_estimators=440, max_depth=30, min_samples_split=3, min_samples_leaf=1, random_state=seed)
model19 = RandomForestRegressor(n_estimators=460, max_depth=18, min_samples_split=5, min_samples_leaf=1, random_state=seed)
model20 = RandomForestRegressor(n_estimators=480, max_depth=32, min_samples_split=2, min_samples_leaf=1, random_state=seed)


for i, rgs in enumerate([model, model2, model3, model4, model5, model6, model7, model8, model9, model10,
                         model11, model12, model13, model14, model15, model16, model17, model18, model19, model20], 1):
    rgs.fit(X_train, Y_train)
    predictions = rgs.predict(X_test)
    mae = mean_absolute_error(Y_test, predictions)
    print(f"\nModel {i}")
    print("MAE Score: %.2f" % (mae))


