In [7]:
import pandas as pd
import xgboost as xgb
from google.cloud import bigquery
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import os


# Configuration (make these configurable!)
PROJECT_ID = "optimum-time-448801-t4" # Your Project ID
DATASET_ID = "gcp_hack_dataset"
TABLE_ID = "cleaned_g2016_mlb_homeruns"
MODEL_FILENAME = "homerun_prediction.xgbmodel"
MODEL_PATH = os.path.join(os.getcwd(), MODEL_FILENAME) # Or use a cloud storage location

def train_and_save_model():
    try:
        client = bigquery.Client(project=PROJECT_ID)
        query = f"SELECT ExitVelocity, LaunchAngle, HitDistance FROM `{DATASET_ID}.{TABLE_ID}`"
        df = client.query(query).to_dataframe()

        X = df[['ExitVelocity', 'LaunchAngle']]
        y = df['HitDistance']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #Added random_state for reproducibility

        model = xgb.XGBRegressor()
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        print(f"Mean Squared Error: {mse}")
        print(f"R-squared: {r2}")

        model.save_model(MODEL_PATH)
        print(f"Model saved to: {MODEL_PATH}")

    except Exception as e:
        print(f"An error occurred: {e}")


if __name__ == "__main__":
    train_and_save_model()



Mean Squared Error: 381.7017687615608
R-squared: 0.39436409964412955
Model saved to: /home/jupyter/homerun_prediction.xgbmodel




In [2]:
import pandas as pd
from surprise import SVD
from google.cloud import bigquery
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split

# Load fan interaction data
client = bigquery.Client()
query = "SELECT user_id, favorite_team_id FROM `gcp_hack_dataset.2025-mlb-fan-favs-follows`"
df = client.query(query).to_dataframe()

# Convert to Surprise format
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df[['user_id', 'favorite_team_id']], reader)

# Train the model
trainset, testset = train_test_split(data, test_size=0.2)
model = SVD()
model.fit(trainset)

# Save model
import pickle
pickle.dump(model, open("recommendation_model.pkl", "wb"))


ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
import pandas as pd
from google.cloud import bigquery  #Or appropriate library for your chosen method
#Import necessary libraries for your chosen recommendation algorithm (surprise, implicit, etc.)

#BigQuery Configuration
PROJECT_ID = "optimum-time-448801-t4"
DATASET_ID = "gcp_hack_dataset"
TABLE_ID = "your_table_name"


def get_data_from_bigquery():
    """Retrieves data from BigQuery."""
    try:
        client = bigquery.Client(project=PROJECT_ID)
        query = f"SELECT user_id, favorite_team_id FROM `{DATASET_ID}.{TABLE_ID}`"
        df = client.query(query).to_dataframe()
        return df
    except Exception as e:
        print(f"Error retrieving data from BigQuery: {e}")
        return None

def train_and_recommend(df):
    """Trains the recommendation model and generates recommendations."""
    #This section depends heavily on your chosen method (surprise, implicit, BigQuery ML, etc.)
    #It will involve data preprocessing, model training, and recommendation generation.
    #Here's a placeholder:
    #... Your recommendation model training and generation logic goes here ...
    return recommendations #Return the recommendations in a suitable format

if __name__ == "__main__":
    df = get_data_from_bigquery()
    if df is not None:
        recommendations = train_and_recommend(df)
        print(recommendations) #Print the generated recommendations.



In [None]:
import tensorflow as tf
from google.cloud import storage
import pandas as pd

# Load training data
df = pd.read_csv("gs://your-bucket-name/game_predictions.csv")

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Classification Output
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(df.drop('play_outcome', axis=1), df['play_outcome'], epochs=10, batch_size=32)
