In [None]:
# Olympics Data Analyzer (Pandas + NumPy + Matplotlib)
# project by SHAURYA CHAWLA

# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Step 2: Create a mock dataset (you can replace with a real CSV later)
data = {
    'Year': [2008, 2008, 2008, 2012, 2012, 2016, 2016, 2016, 2020, 2020],
    'Athlete': ['Usain Bolt','Michael Phelps','Yelena Isinbayeva',
                'Mo Farah','Serena Williams','Simone Biles',
                'Katie Ledecky','Usain Bolt','Neeraj Chopra','Caeleb Dressel'],
    'Country': ['Jamaica','USA','Russia',
                'UK','USA','USA',
                'USA','Jamaica','India','USA'],
    'Sport': ['Athletics','Swimming','Pole Vault',
              'Athletics','Tennis','Gymnastics',
              'Swimming','Athletics','Javelin','Swimming'],
    'Medal': ['Gold','Gold','Gold',
              'Gold','Gold','Gold',
              'Gold','Gold','Gold','Gold']
}

df = pd.DataFrame(data)
print("🏅 Sample Olympics Dataset:\n")
print(df)


#  Medal tally by country
print("\n🥇 Total Medals by Country:\n")
print(df['Country'].value_counts())

# Plotting Medals by country
df['Country'].value_counts().plot(kind='bar', color='gold', title="Medal Tally by Country")
plt.ylabel("Number of Medals")
plt.xlabel("Country")
plt.show()


# Top athletes (most medals)
print("\n👑 Top Athletes by Medal Count:\n")
print(df['Athlete'].value_counts().head(5))


# Average medals per Olympics
avg_medals = df.groupby('Year')['Medal'].count().mean()
print("\n Average medals per Olympic Games:", round(avg_medals,2))


# Medals by Sport
print("\n🏋️ Medals by Sport:\n")
print(df['Sport'].value_counts())

# Plot: Medals by Sport
df['Sport'].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(6,6), title="Medals by Sport")
plt.ylabel("")
plt.show()


# Group by Country & Year (trend analysis)
print("\n📈 Medals by Country and Year:\n")
print(df.groupby(['Country','Year'])['Medal'].count())

# Pivot table for visualization
pivot = df.pivot_table(index='Year', columns='Country', values='Medal', aggfunc='count', fill_value=0)

# Line chart: Medal trend
pivot.plot(kind='line', marker='o', title="Medal Trend Over Years")
plt.ylabel("Number of Medals")
plt.show()


# Saving Results to CSV
df.to_csv("olympics_analysis.csv", index=False)
print("\n Analysis saved to olympics_analysis.csv")
