In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

ModuleNotFoundError: No module named 'pandas'

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from scraper.team_stats_scraper import get_team_last_matches, scrape_team_match_stats
from preprocessing.team_stats_preprocessing import preprocess_team_stats, get_team_form, aggregate_team_form, compile_team_recent_form


In [None]:
plt.style.use('ggplot')
sns.set(style="whitegrid")

In [None]:
teams = [
    "Manchester City",
    "Arsenal",
    "Liverpool"
]
num_matches = 7
all_match_data = []

In [None]:
for team in teams:
    print(f"Scraping data for {team}...")
    team_matches = get_team_last_matches(team, num_matches)
    if team_matches:
        all_match_data.extend(team_matches)
        print(f"  Found {len(team_matches)} matches")
    else:
        print(f"  No matches found")

match_df = pd.DataFrame(all_match_data)
print(f"\nCollected data for {len(match_df)} matches in total")

In [None]:
print("Raw data columns:")
print(match_df.columns.tolist())
match_df.head()

In [None]:
# Preprocess the data
processed_df = preprocess_team_stats(match_df)

# Check the processed data
print("Processed data columns:")
print(processed_df.columns.tolist())
processed_df.head()


In [None]:
# Compile team form data
team_form_df = compile_team_recent_form(processed_df, teams)
team_form_df


In [None]:
# Set up the figure
plt.figure(figsize=(14, 8))

# Plot goals for and against
plt.subplot(2, 2, 1)
team_form_df[['team', 'avg_gf', 'avg_ga']].set_index('team').plot(kind='bar', ax=plt.gca())
plt.title('Average Goals For/Against')
plt.ylabel('Goals per Match')
plt.xticks(rotation=45)

# Plot shots and shots on target
plt.subplot(2, 2, 2)
team_form_df[['team', 'avg_sh', 'avg_sot']].set_index('team').plot(kind='bar', ax=plt.gca())
plt.title('Average Shots and Shots on Target')
plt.ylabel('Shots per Match')
plt.xticks(rotation=45)

# Points bar chart
plt.subplot(2, 2, 3)
team_form_df.sort_values('points', ascending=False).plot(
    x='team', y='points', kind='bar', ax=plt.gca(), color='green')
plt.title('Total Points (Last 7 Matches)')
plt.ylabel('Points')
plt.xticks(rotation=45)

# Win/Draw/Loss Stacked Bar Chart
plt.subplot(2, 2, 4)
team_form_df[['team', 'wins', 'draws', 'losses']].set_index('team').plot(
    kind='bar', stacked=True, ax=plt.gca(),
    color=['green', 'gray', 'red'])
plt.title('Match Results')
plt.ylabel('Number of Matches')
plt.xticks(rotation=45)
plt.legend(loc='upper right')

plt.tight_layout()
plt.show()


In [None]:
# Define stats for radar chart
stats = ['avg_gf', 'avg_ga', 'avg_sh', 'avg_sot', 'avg_dist',
         'shot_accuracy', 'points']

# Normalize data for radar chart
stats_df = team_form_df[['team'] + stats].copy()

# Reverse avg_ga (lower is better)
stats_df['avg_ga'] = stats_df['avg_ga'].max() - stats_df['avg_ga']

# Normalize each stat from 0 to 1
for stat in stats:
    if stats_df[stat].max() > 0:  # Avoid division by zero
        stats_df[stat] = stats_df[stat] / stats_df[stat].max()

# Set up the radar chart
labels = ['Goals For', 'Goals Against*', 'Shots', 'Shots on Target',
          'Distance Covered', 'Shot Accuracy', 'Points']
angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False).tolist()
angles += angles[:1]  # Close the loop

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

# Add labels
plt.xticks(angles[:-1], labels, size=12)

# Add grid
ax.set_rlabel_position(0)
plt.yticks([0.25, 0.5, 0.75], ["0.25", "0.5", "0.75"], color="grey", size=10)
plt.ylim(0, 1)

# Plot each team
for i, team in enumerate(teams):
    team_data = stats_df[stats_df['team'] == team].iloc[0][stats].values.tolist()
    team_data += team_data[:1]  # Close the loop
    
    ax.plot(angles, team_data, linewidth=2, linestyle='solid', label=team)
    ax.fill(angles, team_data, alpha=0.1)

plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
plt.title('Team Performance Comparison (Normalized)', size=20, y=1.08)
plt.figtext(0.5, 0.01, '* Goals Against is reversed (higher is better)', ha='center')

plt.show()


In [None]:
# Create output directory if it doesn't exist
os.makedirs('../outputs', exist_ok=True)

# Save raw match data
match_df.to_csv('../outputs/team_matches_raw.csv', index=False)
print(f"Saved raw match data with {len(match_df)} records to '../outputs/team_matches_raw.csv'")

# Save processed match data
processed_df.to_csv('../outputs/team_matches_processed.csv', index=False)
print("Saved processed match data to '../outputs/team_matches_processed.csv'")

# Save team form analysis
team_form_df.to_csv('../outputs/team_form_analysis.csv', index=False)
print("Saved team form analysis to '../outputs/team_form_analysis.csv'")
