<a href="https://colab.research.google.com/github/runnithan03/Advanced-SM/blob/main/Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data Collection


In [1]:
import pandas as pd

# Load dataset (replace with the actual dataset path)
df = pd.read_csv('rugby_dataset.csv')

# Display first few rows to understand the data structure
print(df.head())


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_your_sports_dataset.csv'

Data Preprocessing and Analysis

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Fill missing values
df.fillna(df.mean(), inplace=True)

# Basic statistics
print(df.describe())

# Visualize key features (e.g., player performance over time)
plt.figure(figsize=(10, 6))
plt.plot(df['match_date'], df['performance_score'], label='Performance Score')
plt.xlabel('Match Date')
plt.ylabel('Performance Score')
plt.title('Player Performance Over Time')
plt.legend()
plt.show()


Feature Engineering

In [None]:
# Creating a moving average of the performance score
df['performance_moving_avg'] = df['performance_score'].rolling(window=5).mean()

# Display updated DataFrame
print(df.head())


Model Building

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Define features (X) and target variable (y)
features = ['performance_moving_avg', 'opponent_strength', 'home_away_flag']  # Replace with relevant features from your dataset
X = df[features]
y = df['performance_score']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")


Evaluation and Visualisation

In [None]:
# Plot actual vs. predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel('Actual Performance')
plt.ylabel('Predicted Performance')
plt.title('Actual vs Predicted Performance')
plt.show()


UI (Optional)

In [None]:
# Save this part as app.py
import streamlit as st

# Load model (you can save the trained model and load it here if needed)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Streamlit app
st.title('Rugby Performance Predictor')

# User input for prediction
moving_avg = st.number_input('Performance Moving Average', min_value=0.0, max_value=100.0, value=50.0)
opponent_strength = st.slider('Opponent Strength (1-10)', 1, 10, 5)
home_away_flag = st.selectbox('Home or Away', ['Home', 'Away'])

# Encode input
home_away_encoded = 1 if home_away_flag == 'Home' else 0

# Prediction
input_data = np.array([[moving_avg, opponent_strength, home_away_encoded]])
prediction = model.predict(input_data)

st.write(f'Predicted Performance Score: {prediction[0]:.2f}')
