In [1]:
!pip install pandas scikit-learn



In [8]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Configuration
start_date = datetime(2025, 4, 1)
end_date = datetime.today()
num_days = (end_date - start_date).days + 1  # inclusive of today

# Instruments categorized by usage frequency
high_usage = ['Piano', 'Guitar']
medium_usage = ['Violin', 'Drums', 'Clarinet', 'Saxophone', 'Harp']
low_usage = ['Trumpet', 'Flute', 'Cello']

all_instruments = high_usage + medium_usage + low_usage

# Generate data
data = []
for day_offset in range(num_days):
    date = start_date + timedelta(days=day_offset)
    weekday = date.strftime('%A')

    for instrument in all_instruments:
        if instrument in high_usage:
            usage_count = random.randint(5, 8)
        elif instrument in medium_usage:
            usage_count = random.randint(2, 5)
        else:  # low_usage
            usage_count = random.randint(0, 2)

        data.append({
            'date': date.strftime('%Y-%m-%d'),
            'weekday': weekday,
            'instrument': instrument,
            'usage_count': usage_count
        })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('instrument_usage.csv', index=False)

# Preview first few rows
print(df.head(20))


          date    weekday instrument  usage_count
0   2025-04-01    Tuesday      Piano            6
1   2025-04-01    Tuesday     Guitar            5
2   2025-04-01    Tuesday     Violin            4
3   2025-04-01    Tuesday      Drums            4
4   2025-04-01    Tuesday   Clarinet            2
5   2025-04-01    Tuesday  Saxophone            3
6   2025-04-01    Tuesday       Harp            5
7   2025-04-01    Tuesday    Trumpet            0
8   2025-04-01    Tuesday      Flute            0
9   2025-04-01    Tuesday      Cello            2
10  2025-04-02  Wednesday      Piano            8
11  2025-04-02  Wednesday     Guitar            7
12  2025-04-02  Wednesday     Violin            4
13  2025-04-02  Wednesday      Drums            3
14  2025-04-02  Wednesday   Clarinet            4
15  2025-04-02  Wednesday  Saxophone            3
16  2025-04-02  Wednesday       Harp            5
17  2025-04-02  Wednesday    Trumpet            1
18  2025-04-02  Wednesday      Flute            0


In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import calendar

# Load data
df = pd.read_csv('instrument_usage.csv')

# Pivot the data
pivot = df.pivot_table(index=['date', 'weekday'], columns='instrument', values='usage_count', fill_value=0)
pivot = pivot.reset_index()
pivot = pd.get_dummies(pivot, columns=['weekday'])

# Extract instruments
instruments = df['instrument'].unique()

# Add target column
pivot['target'] = pivot[instruments].idxmax(axis=1)

# Create lag features
for instr in instruments:
    pivot[f'{instr}_lag1'] = pivot[instr].shift(1)

# Drop rows with NaNs from lag creation
pivot = pivot.dropna().reset_index(drop=True)

# Prepare data for training
X = pivot.drop(['date', 'target'], axis=1)
y = pivot['target']

# Save training weekday columns
weekday_cols = [col for col in X.columns if col.startswith('weekday_')]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
print("Evaluation:\n", classification_report(y_test, y_pred))

# ---------------------------------------------
# 🔮 Predict for Any Specific Date
# ---------------------------------------------

def predict_instrument(target_date_str):
    target_date = pd.to_datetime(target_date_str)
    prev_date = target_date - pd.Timedelta(days=1)
    prev_date_str = prev_date.strftime('%Y-%m-%d')
    weekday = calendar.day_name[target_date.weekday()]

    # Attempt to get previous day's actual lag values
    prev_row = pivot[pivot['date'] == prev_date_str]
    if not prev_row.empty:
        input_row = prev_row[[f'{instr}_lag1' for instr in instruments]].copy()
        input_row.columns = [instr for instr in instruments]
    else:
        # Use average weekday usage if previous day is missing
        weekday_df = df[df['weekday'] == weekday].groupby('instrument')['usage_count'].mean()
        input_row = pd.DataFrame(columns=instruments)
        for instr in instruments:
            input_row.at[0, instr] = weekday_df.get(instr, df[df['instrument'] == instr]['usage_count'].mean())

    # Add weekday one-hot columns
    for col in weekday_cols:
        input_row[col] = 1 if col == f'weekday_{weekday}' else 0

    # Fill missing weekday columns
    for col in X.columns:
        if col not in input_row.columns:
            input_row[col] = 0

    # Match column order
    input_row = input_row[X.columns]

    # Predict
    predicted_instrument = clf.predict(input_row)[0]
    print(f"🎯 Predicted instrument with highest usage on {target_date.date()}: {predicted_instrument}")

# 🔍 Try predicting any date
predict_instrument('2025-07-15')
predict_instrument('2025-08-01')
predict_instrument('2025-09-01')


Evaluation:
               precision    recall  f1-score   support

      Guitar       0.86      1.00      0.92         6
       Piano       1.00      0.94      0.97        17

    accuracy                           0.96        23
   macro avg       0.93      0.97      0.95        23
weighted avg       0.96      0.96      0.96        23

🎯 Predicted instrument with highest usage on 2025-07-15: Piano
🎯 Predicted instrument with highest usage on 2025-08-01: Guitar
🎯 Predicted instrument with highest usage on 2025-09-01: Guitar
