In [3]:
import pandas as pd
import json

In [4]:
with open('private_cases.json', 'r') as f:
    data = json.load(f)

# Flatten the nested structure
records = []
for entry in data:
    records.append(entry)

# Create DataFrame
df = pd.DataFrame(records)

def ends_with_49(x):
    return round(x % 1, 2) == 0.49

def ends_with_99(x):
    return round(x % 1, 2) == 0.99

def ends_with_6(x):
    return int(x) % 10 == 6

df['miles_traveled_per_day'] = [x / y * 1.0  for x, y in zip(df['miles_traveled'], df['trip_duration_days'])]
df['total_receipts_amount_per_day'] = [x / y * 1.0  for x, y in zip(df['total_receipts_amount'], df['trip_duration_days'])]
df['total_receipts_amount_per_mile'] = [x / y * 1.0  for x, y in zip(df['total_receipts_amount'], df['miles_traveled'])]
df['ends_with_49'] = [1 if ends_with_49(x) else 0 for x in df['total_receipts_amount']]
df['ends_with_99'] = [1 if ends_with_99(x) else 0 for x in df['total_receipts_amount']]
df['ends_with_49_99'] = [1 if ends_with_49(x) or ends_with_99(x) else 0 for x in df['total_receipts_amount']]
df['day_5'] = [1 if x == 5 else 0 for x in df['trip_duration_days']]
df['day_6'] = [1 if x == 6 else 0 for x in df['trip_duration_days']]
df['day_5_or_6'] = [1 if x == 5 or x == 6 else 0 for x in df['trip_duration_days']]
df['day_7'] = [1 if x == 7 else 0 for x in df['trip_duration_days']]
df['day_8'] = [1 if x == 8 else 0 for x in df['trip_duration_days']]
df['day_9'] = [1 if x == 9 else 0 for x in df['trip_duration_days']]
df['day_10'] = [1 if x == 10 else 0 for x in df['trip_duration_days']]
df['day_11'] = [1 if x == 11 else 0 for x in df['trip_duration_days']]
df['day_12'] = [1 if x == 12 else 0 for x in df['trip_duration_days']]
df['day_13'] = [1 if x == 13 else 0 for x in df['trip_duration_days']]
df['day_7_or_8'] = [1 if x == 7 or x == 8 else 0 for x in df['trip_duration_days']]
df['recidual'] = [round(x % 0.5, 2) * 100 for x in df['total_receipts_amount']]
df['sweet_miles_traveled_per_day'] = [1 if x >180 and x < 220 else 0 for x in df['miles_traveled_per_day']]
df['medium_high_spending'] = [1 if x > 600 and x < 800 else 0 for x in df['total_receipts_amount']]
df['bonus'] = [1 if x > 180 and y < 100 else 0 for x, y in zip(df['miles_traveled_per_day'], df['total_receipts_amount_per_day'])]
df['miles_900_1050'] = [1 if x > 900 and x < 1100 else 0 for x in df['miles_traveled']]
df['ends_with_6'] = [1 if ends_with_6(x) else 0 for x in df['miles_traveled']]

# Display first few rows
df.head()

Unnamed: 0,trip_duration_days,miles_traveled,total_receipts_amount,miles_traveled_per_day,total_receipts_amount_per_day,total_receipts_amount_per_mile,ends_with_49,ends_with_99,ends_with_49_99,day_5,...,day_11,day_12,day_13,day_7_or_8,recidual,sweet_miles_traveled_per_day,medium_high_spending,bonus,miles_900_1050,ends_with_6
0,3,191.0,21.87,63.666667,7.29,0.114503,0,0,0,0,...,0,0,0,0,37.0,0,0,0,0,0
1,1,81.0,3.93,81.0,3.93,0.048519,0,0,0,0,...,0,0,0,0,43.0,0,0,0,0,0
2,3,204.0,16.76,68.0,5.586667,0.082157,0,0,0,0,...,0,0,0,0,26.0,0,0,0,0,0
3,1,64.0,22.47,64.0,22.47,0.351094,0,0,0,0,...,0,0,0,0,47.0,0,0,0,0,0
4,3,204.0,22.19,68.0,7.396667,0.108775,0,0,0,0,...,0,0,0,0,19.0,0,0,0,0,0


In [5]:
X = df[['trip_duration_days', 'miles_traveled', 'total_receipts_amount', 'miles_traveled_per_day', 'total_receipts_amount_per_day', 
        'day_5', 'day_6', 'day_7_or_8', 'ends_with_49_99', 'sweet_miles_traveled_per_day']]


In [6]:
import joblib
loaded_model = joblib.load('gbr_model_final.pkl')

In [8]:
predictions = loaded_model.predict(X)

In [10]:
with open("private_results.txt", "w") as f:
    for score in predictions:
        score = max(1, round(float(score), 2))
        f.write(f"{score}\n")