**Title**: Data Wrangling 12.3 Project Milestone 5
**Author**: Ryan Weeks  
**Date**: 2/28/2025  
**Description**:  I created an SQLite database to integrate my three datasets containing NFL player statistics and NFL Hall of Fame data. After cleaning and standardizing column names, I loaded them into SQL tables and used joins on player and year to merge the data while handling duplicates with aggregation. Finally, I exported the joined dataset as a CSV for further analysis in Tableau, ensuring efficient querying and data integrity.

In [13]:
import sqlite3
import pandas as pd

# File paths
file_paths = {
    "matching_player_data": "C:\\Users\\Weekseey\\Documents\\Bellevue Work\\Data Wrangling\\cleaned_matching_player_data.csv",
    "hof_data": "C:\\Users\\Weekseey\\Documents\\Bellevue Work\\Data Wrangling\\hof_df_filtered.csv",
    "api_player_stats": "C:\\Users\\Weekseey\\Documents\\Bellevue Work\\Data Wrangling\\API_player_stats_expanded.csv"
}

# Load CSVs into DataFrames
matching_player_data = pd.read_csv(file_paths["matching_player_data"])
hof_data = pd.read_csv(file_paths["hof_data"])
api_player_stats = pd.read_csv(file_paths["api_player_stats"])

# Rename 'player_name' in API dataset to match others
api_player_stats.rename(columns={'player_name': 'player'}, inplace=True)

# Create SQLite database
conn = sqlite3.connect("players_database.db")
cursor = conn.cursor()

# Load DataFrames into SQL tables
matching_player_data.to_sql("matching_player_data", conn, if_exists="replace", index=False)
hof_data.to_sql("hof_data", conn, if_exists="replace", index=False)
api_player_stats.to_sql("api_player_stats", conn, if_exists="replace", index=False)

# SQL Query to join the tables on 'player' and 'year' to prevent duplication
query = """
SELECT DISTINCT m.player, m.year, m.*, h.*, a.*
FROM matching_player_data m
LEFT JOIN hof_data h ON m.player = h.player
LEFT JOIN api_player_stats a ON m.player = a.player AND m.year = a.year;
"""

# Execute and load into a DataFrame
joined_data = pd.read_sql(query, conn)

# Save the joined dataset as a CSV
joined_data.to_csv("C:\\Users\\Weekseey\\Documents\\Bellevue Work\\Data Wrangling\\joined_players_data.csv", index=False)

# Close connection
conn.close()