In [None]:
import pandas as pd # type: ignore
from sklearn.metrics import accuracy_score # type: ignore
from sklearn.model_selection import train_test_split # type: ignore
from sklearn.preprocessing import StandardScaler # type: ignore
import tensorflow as tf # type: ignore
from tensorflow.keras.layers import Dense, Dropout # type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore

df = pd.read_csv('ncaa_stats.csv')
df.dropna(axis=1, thresh=100, inplace=True)

feature_columns = [col for col in df.columns if col not in ['Team', 'Year', 'Winner/Target']]
past_data = df[df['Winner/Target'] != 2]
current_year_data = df[df['Winner/Target'] == 2].copy()

X = past_data[feature_columns]
y = past_data['Winner/Target']
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), verbose=1)

y_pred = (model.predict(X_test) > 0.5).astype(int)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

current_X_scaled = scaler.transform(current_year_data[feature_columns])
current_year_data['Raw_Probability'] = model.predict(current_X_scaled)
current_year_data['Win_Probability'] = (current_year_data.groupby('Year')['Raw_Probability'].transform(lambda x: x / x.sum())) * 100

pd.set_option('display.max_rows', 10000)
display(current_year_data[['Year', 'Team', 'Win_Probability']].sort_values(['Year', 'Win_Probability'], ascending=[True, False]))