In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load your data
df = pd.read_csv(r'C:\Users\Jouke\Documents\evedata-logger\output\market_data_with_names_merged.csv')

# Features to use (customize as needed)
feature_cols = ['volume', 'highest', 'lowest']

# Target to predict
target_col = 'average'

# Loop over each unique type_id
for type_id, subdf in df.groupby('type_id'):
    # Optional: Skip items with too few samples
    if len(subdf) < 10:
        print(f"Skipping type_id {type_id}: not enough data.")
        continue

    # Prepare features and target
    X = subdf[feature_cols]
    y = subdf[target_col]

    # Split into train/test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Fit the model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    type_name = subdf['type_name'].iloc[0]  # Get the name for display

    print(f"type_id: {type_id} | type_name: {type_name}")
    print(f"  Test samples: {len(y_test)}")
    print(f"  Mean Squared Error: {mse:.2f}\n")


type_id: 18 | type_name: Plagioclase
  Test samples: 73
  Mean Squared Error: 6.93

type_id: 19 | type_name: Spodumain
  Test samples: 71
  Mean Squared Error: 1057982.00

type_id: 20 | type_name: Kernite
  Test samples: 73
  Mean Squared Error: 751.88

type_id: 21 | type_name: Hedbergite
  Test samples: 73
  Mean Squared Error: 19370.76

type_id: 22 | type_name: Arkonor
  Test samples: 73
  Mean Squared Error: 11622.19

type_id: 34 | type_name: Tritanium
  Test samples: 73
  Mean Squared Error: 0.00

type_id: 35 | type_name: Pyerite
  Test samples: 73
  Mean Squared Error: 0.30

type_id: 36 | type_name: Mexallon
  Test samples: 73
  Mean Squared Error: 1.24

type_id: 37 | type_name: Isogen
  Test samples: 73
  Mean Squared Error: 12.89

type_id: 38 | type_name: Nocxium
  Test samples: 73
  Mean Squared Error: 142.12

type_id: 39 | type_name: Zydrine
  Test samples: 73
  Mean Squared Error: 527.57

type_id: 40 | type_name: Megacyte
  Test samples: 73
  Mean Squared Error: 11195.40

typ