<a href="https://colab.research.google.com/github/kerrfat/clients/blob/main/pbprediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import chi2_contingency

# Load and prepare data
df = pd.read_csv('data/data.csv', parse_dates=['Draw Date'])
df = df.sort_values('Draw Date').reset_index(drop=True)

# Feature engineering
df['Year'] = df['Draw Date'].dt.year
df['Month'] = df['Draw Date'].dt.month
df['Day'] = df['Draw Date'].dt.day
df['DayOfWeek'] = df['Draw Date'].dt.dayofweek
df['DayOfYear'] = df['Draw Date'].dt.dayofyear
df['WeekOfYear'] = df['Draw Date'].dt.isocalendar().week

# Define balls
white_balls = ['Number_1', 'Number_2', 'Number_3', 'Number_4', 'Number_5']
powerball = 'Number_6'

# 1. Frequency Analysis
plt.figure(figsize=(15, 10))
plt.subplot(2, 1, 1)
all_white = df[white_balls].values.flatten()
sns.histplot(all_white, bins=69, kde=True)
plt.title('White Ball Frequency Distribution')
plt.xlabel('Number')
plt.ylabel('Frequency')

plt.subplot(2, 1, 2)
sns.histplot(df[powerball], bins=26, kde=True)
plt.title('Powerball Frequency Distribution')
plt.xlabel('Number')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

# 2. Temporal Analysis
# Monthly patterns
monthly_avg = df.groupby('Month')[white_balls].mean()
plt.figure(figsize=(12, 6))
sns.heatmap(monthly_avg.T, cmap='viridis', annot=True, fmt=".1f")
plt.title('Average White Ball Numbers by Month')
plt.xlabel('Month')
plt.ylabel('Ball Position')
plt.show()

# Yearly trends
yearly_avg = df.groupby('Year')[white_balls].mean()
plt.figure(figsize=(12, 6))
for i, ball in enumerate(white_balls):
    plt.plot(yearly_avg.index, yearly_avg[ball], label=f'Ball {i+1}')
plt.title('Yearly Trends in White Ball Numbers')
plt.xlabel('Year')
plt.ylabel('Average Number')
plt.legend()
plt.grid(True)
plt.show()

# 3. Combinatorial Analysis
# Odd/even distribution
df['OddCount'] = df[white_balls].apply(lambda row: sum(n % 2 != 0 for n in row), axis=1)
odd_even_dist = df['OddCount'].value_counts().sort_index()

# Number clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(df[white_balls])

# Pair analysis
all_pairs = []
for _, row in df.iterrows():
    nums = sorted(row[white_balls])
    all_pairs.extend(list(combinations(nums, 2)))
pair_counts = pd.Series(all_pairs).value_counts().head(10)

# 4. Time Series Decomposition
# Create a date range for complete time series
date_range = pd.date_range(start=df['Draw Date'].min(), end=df['Draw Date'].max(), freq='D')
ts_df = pd.DataFrame(index=date_range)
ts_df = ts_df.join(df.set_index('Draw Date')['Number_1'], how='left').fillna(method='ffill')

# Decompose time series
decomposition = seasonal_decompose(ts_df, period=365)  # yearly seasonality
decomposition.plot()
plt.suptitle('Time Series Decomposition of First White Ball')
plt.tight_layout()
plt.show()

# 5. Predictive Modeling
# Prepare data
X = df[['Year', 'Month', 'Day', 'DayOfWeek', 'DayOfYear']]
y_white = df[white_balls]
y_pb = df[powerball]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train models
white_models = {}
for i, col in enumerate(white_balls):
    model = RandomForestClassifier(n_estimators=150, random_state=42)
    model.fit(X_scaled, df[col])
    white_models[col] = model

pb_model = RandomForestClassifier(n_estimators=150, random_state=42)
pb_model.fit(X_scaled, y_pb)

# Predict next draw
next_date = datetime.now()
next_features = pd.DataFrame([[next_date.year, next_date.month, next_date.day,
                              next_date.weekday(), next_date.timetuple().tm_yday]],
                            columns=X.columns)
next_scaled = scaler.transform(next_features)

white_preds = []
for col in white_balls:
    pred = white_models[col].predict(next_scaled)[0]
    white_preds.append(pred)

pb_pred = pb_model.predict(next_scaled)[0]

# 6. Generate Recommendations
sorted_preds = sorted(white_preds)
print("\n" + "="*50)
print(f"Predicted Numbers for Next Draw ({next_date.strftime('%Y-%m-%d')}):")
print(f"White Balls: {sorted_preds}")
print(f"Powerball: {pb_pred}")
print("="*50)

# Feature importance
plt.figure(figsize=(10, 6))
feature_importances = pd.Series(white_models['Number_1'].feature_importances_, index=X.columns)
feature_importances.sort_values().plot(kind='barh')
plt.title('Feature Importance for Number Prediction')
plt.show()

Predicted Numbers for Next Draw (2025-07-19):
*   White Balls: 4, 16,32, 40, 57
*   Powerball: 4

