<a href="https://colab.research.google.com/github/juyounLee/AriaLucent/blob/main/powerball.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Convert Draw Date to datetime format and extract year
df['Draw Date'] = pd.to_datetime(df['Draw Date'])
df['Year'] = df['Draw Date'].dt.year

# Split winning numbers and convert to integers
winning_numbers = df['Winning Numbers'].str.split().apply(lambda x: list(map(int, x)))
all_numbers = [num for sublist in winning_numbers for num in sublist]

# 1. Frequency Analysis of Winning Numbers
num_counts = Counter(all_numbers)
numbers, counts = zip(*num_counts.most_common())

# Convert to DataFrame
freq_df = pd.DataFrame({'Winning Number': numbers, 'Frequency': counts})
print(freq_df)

# Generate color palette based on frequency
cmap = sns.color_palette("ch:s=.25,rot=-.20", as_cmap=True)
colors = cmap(freq_df["Frequency"])

# Plot Frequency of Winning Numbers
plt.figure(figsize=(12, 6), facecolor="#78C4C5")
ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_facecolor("#78C4C5")

sns.barplot(x=freq_df['Winning Number'], y=freq_df['Frequency'], palette=colors, order=freq_df['Winning Number'])
plt.xlabel("Winning Numbers", color='white')
plt.ylabel("Frequency", color='white')
plt.xticks(rotation=90, color='white')
plt.yticks(color='white')
plt.show()

# 2. Most Common Winning Number Combinations
combination_counts = Counter(map(tuple, winning_numbers))
common_combinations = combination_counts.most_common(10)
combination_df = pd.DataFrame(common_combinations, columns=['Winning Combination', 'Frequency'])
print(combination_df)

# 3. Distribution of Winning Numbers Over the Years
yearly_numbers = {year: [] for year in df['Year'].unique()}
for year, nums in zip(df['Year'], winning_numbers):
    yearly_numbers[year].extend(nums)

yearly_df = pd.DataFrame([(year, num) for year, nums in yearly_numbers.items() for num in nums], columns=['Year', 'Winning Number'])
print(yearly_df.head())

# Boxplot of Winning Numbers by Year
plt.figure(figsize=(14, 6), facecolor="#78C4C5")
ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_facecolor("#78C4C5")

sns.boxplot(x=yearly_df['Year'], y=yearly_df['Winning Number'], palette='coolwarm')
plt.xlabel("Year", color='white')
plt.ylabel("Winning Numbers", color='white')
plt.xticks(rotation=90, color='white')
plt.yticks(color='white')
plt.show()

# 4. Analyzing the Effect of Multipliers (if applicable)
if 'Multiplier' in df.columns:
    multiplier_numbers, multiplier_values = [], []

    for multiplier, nums in zip(df['Multiplier'], winning_numbers):
        multiplier_numbers.extend(nums)
        multiplier_values.extend([multiplier] * len(nums))

    multiplier_df = pd.DataFrame({'Multiplier': multiplier_values, 'Winning Number': multiplier_numbers})
    print(multiplier_df)

    # Boxplot: Effect of Multiplier on Winning Numbers
    plt.figure(figsize=(10, 6), facecolor="#78C4C5")
    ax = plt.gca()
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_facecolor("#78C4C5")

    sns.boxplot(x=multiplier_df['Multiplier'].astype(str), y=multiplier_df['Winning Number'], palette='Set2')
    plt.xlabel("Multiplier", color='white')
    plt.ylabel("Winning Numbers", color='white')
    plt.xticks(color='white')
    plt.yticks(color='white')
    plt.show()

# 5. Odd vs. Even Number Distribution Analysis
def odd_even_ratio(numbers):
    odds = sum(1 for num in numbers if num % 2 == 1)
    evens = len(numbers) - odds
    return odds, evens

odd_counts, even_counts = zip(*[odd_even_ratio(nums) for nums in winning_numbers])
df['Odd Count'] = odd_counts
df['Even Count'] = even_counts

odd_even_df = pd.DataFrame({'Odd Count': odd_counts, 'Even Count': even_counts})
print(odd_even_df.head())

# Histogram of Odd vs. Even Counts
plt.figure(figsize=(12, 8), facecolor="#78C4C5")
ax = plt.gca()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_facecolor("#78C4C5")

sns.histplot(df['Odd Count'], bins=range(0, 7), kde=True, color='#143D60', label='Odd Numbers')
sns.histplot(df['Even Count'], bins=range(0, 7), kde=True, color='#D84040', label='Even Numbers')
plt.xlabel("Count", color='white')
plt.ylabel("Frequency", color='white')
plt.xticks(color='white')
plt.yticks(color='white')
plt.legend(facecolor="#78C4C5")
plt.show()