In [43]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from datetime import datetime

In [44]:
X_train = pd.read_csv("/Users/goutham/Documents/ML Hackathon/gdsc-vashisht-ml-hackathon-open-for-all/X_train.csv")
y_train = pd.read_csv("/Users/goutham/Documents/ML Hackathon/gdsc-vashisht-ml-hackathon-open-for-all/y_train.csv")
X_test = pd.read_csv("/Users/goutham/Documents/ML Hackathon/gdsc-vashisht-ml-hackathon-open-for-all/X_test.csv")

In [45]:
train_data = pd.concat([X_train, y_train], axis=1)

In [46]:
train_data['date'] = pd.to_datetime(train_data['date'])
train_data['day_of_week'] = train_data['date'].dt.dayofweek
train_data['month'] = train_data['date'].dt.month
train_data['year'] = train_data['date'].dt.year
X_test['date'] = pd.to_datetime(X_test['date'])
X_test['day_of_week'] = X_test['date'].dt.dayofweek
X_test['month'] = X_test['date'].dt.month
X_test['year'] = X_test['date'].dt.year

In [47]:
features = ['home_team', 'away_team', 'tournament', 'city', 'country', 'neutral', 'day_of_week', 'month', 'year']
target = 'result'

In [48]:
categorical_features = ['home_team', 'away_team', 'tournament', 'city', 'country']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

In [49]:
classifier = RandomForestClassifier(random_state=42)


In [50]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', classifier)
])

In [51]:
X_train_processed, X_val_processed, y_train_processed, y_val_processed = train_test_split(train_data[features], train_data[target], test_size=0.2, random_state=42)


In [52]:
pipeline.fit(X_train_processed, y_train_processed)


In [53]:
val_predictions = pipeline.predict(X_val_processed)

In [54]:
accuracy = accuracy_score(y_val_processed, val_predictions)
print("Validation Accuracy:", accuracy)

Validation Accuracy: 0.515362457993279


In [55]:
test_predictions = pipeline.predict(X_test)

In [60]:
results_df = pd.DataFrame({'row_id': range(0, len(test_predictions)), 'result': test_predictions})

# Set 'row_id' column as index

In [61]:
results_df.set_index('row_id', inplace=True)

In [63]:
results_df.to_csv("/Users/goutham/Documents/ML Hackathon/gdsc-vashisht-ml-hackathon-open-for-all/predicted_results.csv")