In [1]:
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
#Import package matplotlib for visualisation/plotting
# import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.tree import export_graphviz
import xgboost as xgb
from xgboost import XGBRegressor
import time
import pickle
from main.models import ZoneDetail
import datetime
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rest.settings')
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

In [2]:
zone = ZoneDetail.objects.filter(datetime__date=datetime.date(2023, 7, 27))

In [3]:
df = pd.DataFrame.from_records(zone.values())

In [4]:
df.head(5)

Unnamed: 0,zone_time_id,taxi_zone_id,datetime,impression_history,impression_predict,year_month,week,hour,borough,entertainment_and_recreation,...,parking_and_automotive_services,professional_services,real_estate,retail_services,transportation,hospital,hotspots,school,total_business,holiday
0,529,2,2023-07-27 10:00:00+00:00,0.0,0,2023-07,3,6,Queens,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,No
1,530,2,2023-07-27 11:00:00+00:00,0.0,0,2023-07,3,7,Queens,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,No
2,531,2,2023-07-27 12:00:00+00:00,0.0,0,2023-07,3,8,Queens,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,No
3,533,2,2023-07-27 14:00:00+00:00,0.0,0,2023-07,3,10,Queens,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,No
4,523,2,2023-07-27 04:00:00+00:00,0.0,0,2023-07,3,0,Queens,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,No


In [5]:
# df = df.drop(['zone_time_id','impression_history','datetime'], axis=1)

In [6]:
df['year_month'] = df['year_month'].replace('2023-07','7')

In [7]:
df.rename(columns={'taxi_zone_id': 'taxi_zone', 'impression_predict': 'passenger_count','year_month': 'month'}, inplace=True)

In [8]:
df['taxi_zone'] = df['taxi_zone'].astype('category')
df['month'] = df['month'].astype('category')
df['week'] = df['week'].astype('category')
df['hour'] = df['hour'].astype('category')
df['holiday'] = df['holiday'].astype('category')
df['borough'] = df['borough'].astype('category')

df.dtypes

zone_time_id                                     int64
taxi_zone                                     category
datetime                           datetime64[ns, UTC]
impression_history                             float64
passenger_count                                  int64
month                                         category
week                                          category
hour                                          category
borough                                       category
entertainment_and_recreation                   float64
financial_services                             float64
food_and_beverage                              float64
parking_and_automotive_services                float64
professional_services                          float64
real_estate                                    float64
retail_services                                float64
transportation                                 float64
hospital                                       float64
hotspots  

In [9]:
df['taxi_zone'] = pd.Categorical(df['taxi_zone'], categories=['1','2','3','4','5','6','7','8','9','10',
                                                              '11','12','13','14','15','16','17','18','19','20',
                                                              '21','22','23','24','25','26','27','28','29','30',
                                                              '31','32','33','34','35','36','37','38','39','40',
                                                              '41','42','43','44','45','46','47','48','49','50',
                                                              '51','52','53','54','55','56','57','58','59','60',
                                                              '61','62','63','64','65','66','67','68','69','70',
                                                              '71','72','73','74','75','76','77','78','79','80',
                                                              '81','82','83','84','85','86','87','88','89','90',
                                                              '91','92','93','94','95','96','97','98','99','100',
                                                              '101','102','105','106','107','108','109','110',
                                                              '111','112','113','114','115','116','117','118',
                                                              '119','120','121','122','123','124','125','126',
                                                              '127','128','129','130','131','132','133','134',
                                                              '135','136','137','138','139','140','141','142',
                                                              '143','144','145','146','147','148','149','150',
                                                              '151','152','153','154','155','156','157','158',
                                                              '159','160','161','162','163','164','165','166',
                                                              '167','168','169','170','171','172','173','174',
                                                              '175','176','177','178','179','180','181','182',
                                                              '183','184','185','186','187','188','189','190',
                                                              '191','192','193','194','195','196','197','198',
                                                              '199','200','201','202','203','204','205','206',
                                                              '207','208','209','210','211','212','213','214',
                                                              '215','216','217','218','219','220','221','222',
                                                              '223','224','225','226','227','228','229','230',
                                                              '231','232','233','234','235','236','237','238',
                                                              '239','240','241','242','243','244','245','246',
                                                              '247','248','249','250','251','252','253','254',
                                                              '255','256','257','258','259','260','261','262',
                                                              '263'])

In [10]:
df['week'] = pd.Categorical(df['week'], categories=['0','1','2','3','4','5','6'])

In [11]:
df['hour'] = pd.Categorical(df['hour'], categories=['0','1','2','3','4','5','6','7','8','9','10','11','12',
                                                    '13','14','15','16','17','18','19','20','21','22','23'])

In [12]:
df['borough'] = pd.Categorical(df['borough'], categories=['Bronx', 'Brooklyn', 'EWR', 'Manhattan', 'Queens', 
                                                          'Staten Island'])

In [13]:
df['month'] = pd.Categorical(df['month'], categories=['1','2','3','4','5','6','7','8','9','10','11','12'])

In [14]:
df['holiday'] = pd.Categorical(df['holiday'], categories=['Christmas Day','Christmas Day (Observed)',
                                                          'Columbus Day','Independence Day','Labor Day',
                                                          'Martin Luther King Jr. Day', 'Memorial Day',
                                                          "New Year's Day", "New Year's Day (Observed)","No",
                                                          "Thanksgiving","Veterans Day","Washington's Birthday"
                                                         ])

In [15]:
df_pk = df[['zone_time_id','impression_history','datetime']]
df = df.drop(['zone_time_id','impression_history','datetime'], axis=1)

In [16]:
# set up dummies features
df_dummy = pd.get_dummies(df)
df_dummy.shape

(3027, 336)

In [17]:
df.shape

(3027, 19)

In [18]:
for col in df_dummy.columns:
    print(col)

passenger_count
entertainment_and_recreation
financial_services
food_and_beverage
parking_and_automotive_services
professional_services
real_estate
retail_services
transportation
hospital
hotspots
school
total_business
taxi_zone_1
taxi_zone_2
taxi_zone_3
taxi_zone_4
taxi_zone_5
taxi_zone_6
taxi_zone_7
taxi_zone_8
taxi_zone_9
taxi_zone_10
taxi_zone_11
taxi_zone_12
taxi_zone_13
taxi_zone_14
taxi_zone_15
taxi_zone_16
taxi_zone_17
taxi_zone_18
taxi_zone_19
taxi_zone_20
taxi_zone_21
taxi_zone_22
taxi_zone_23
taxi_zone_24
taxi_zone_25
taxi_zone_26
taxi_zone_27
taxi_zone_28
taxi_zone_29
taxi_zone_30
taxi_zone_31
taxi_zone_32
taxi_zone_33
taxi_zone_34
taxi_zone_35
taxi_zone_36
taxi_zone_37
taxi_zone_38
taxi_zone_39
taxi_zone_40
taxi_zone_41
taxi_zone_42
taxi_zone_43
taxi_zone_44
taxi_zone_45
taxi_zone_46
taxi_zone_47
taxi_zone_48
taxi_zone_49
taxi_zone_50
taxi_zone_51
taxi_zone_52
taxi_zone_53
taxi_zone_54
taxi_zone_55
taxi_zone_56
taxi_zone_57
taxi_zone_58
taxi_zone_59
taxi_zone_60
taxi_zone_

In [19]:
# split data set into the features and target feature
target_feature = pd.DataFrame(df_dummy["passenger_count"])
features = df_dummy.drop(labels=["passenger_count"], axis=1)
features

Unnamed: 0,entertainment_and_recreation,financial_services,food_and_beverage,parking_and_automotive_services,professional_services,real_estate,retail_services,transportation,hospital,hotspots,...,holiday_Independence Day,holiday_Labor Day,holiday_Martin Luther King Jr. Day,holiday_Memorial Day,holiday_New Year's Day,holiday_New Year's Day (Observed),holiday_No,holiday_Thanksgiving,holiday_Veterans Day,holiday_Washington's Birthday
0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
1,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
2,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
4,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3024,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3025,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False


In [20]:
target_features=df_dummy[['passenger_count']]

In [21]:
print(type(target_features))

<class 'pandas.core.frame.DataFrame'>


# run pickle

In [22]:
loaded_model = pickle.load(open('basic_XGboost_model_2.2.1.pkl', 'rb'))
loaded_model

In [23]:
features

Unnamed: 0,entertainment_and_recreation,financial_services,food_and_beverage,parking_and_automotive_services,professional_services,real_estate,retail_services,transportation,hospital,hotspots,...,holiday_Independence Day,holiday_Labor Day,holiday_Martin Luther King Jr. Day,holiday_Memorial Day,holiday_New Year's Day,holiday_New Year's Day (Observed),holiday_No,holiday_Thanksgiving,holiday_Veterans Day,holiday_Washington's Birthday
0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
1,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
2,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
4,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3024,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3025,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False


In [24]:
# let's say you have some data in X_test
predictions = loaded_model.predict(features)
predictions[predictions < 0] = 0
predictions = predictions.astype(int)
# print predictions
print(predictions)

[0 0 0 ... 0 0 0]


In [25]:
print(features.columns)

Index(['entertainment_and_recreation', 'financial_services',
       'food_and_beverage', 'parking_and_automotive_services',
       'professional_services', 'real_estate', 'retail_services',
       'transportation', 'hospital', 'hotspots',
       ...
       'holiday_Independence Day', 'holiday_Labor Day',
       'holiday_Martin Luther King Jr. Day', 'holiday_Memorial Day',
       'holiday_New Year's Day', 'holiday_New Year's Day (Observed)',
       'holiday_No', 'holiday_Thanksgiving', 'holiday_Veterans Day',
       'holiday_Washington's Birthday'],
      dtype='object', length=335)


In [26]:
# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(predictions, columns=['predicted_passenger_count'])

# Reset indices of your target_feature, features DataFrames to align with predictions_df
# target_feature.reset_index(drop=True, inplace=True)
# features.reset_index(drop=True, inplace=True)

# Concatenate the target_feature, features and predictions dataframes
result = pd.concat([df_pk, predictions_df, target_feature, features], axis=1)

result

Unnamed: 0,zone_time_id,impression_history,datetime,predicted_passenger_count,passenger_count,entertainment_and_recreation,financial_services,food_and_beverage,parking_and_automotive_services,professional_services,...,holiday_Independence Day,holiday_Labor Day,holiday_Martin Luther King Jr. Day,holiday_Memorial Day,holiday_New Year's Day,holiday_New Year's Day (Observed),holiday_No,holiday_Thanksgiving,holiday_Veterans Day,holiday_Washington's Birthday
0,529,0.0,2023-07-27 10:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
1,530,0.0,2023-07-27 11:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
2,531,0.0,2023-07-27 12:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3,533,0.0,2023-07-27 14:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
4,523,0.0,2023-07-27 04:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3022,3545,,2023-07-28 02:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3023,3546,,2023-07-28 03:00:00+00:00,0,0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,True,False,False,False
3024,3547,,2023-07-27 04:00:00+00:00,0,0,0.0,0.0,0.0,0.0,1.0,...,False,False,False,False,False,False,True,False,False,False
3025,3548,,2023-07-27 05:00:00+00:00,0,0,0.0,0.0,0.0,0.0,1.0,...,False,False,False,False,False,False,True,False,False,False


In [27]:
result['predicted_passenger_count'].unique()

array([ 0,  1,  6,  5,  3,  7, 10,  4,  8,  2, 11, 15,  9])

# Write prediction result in database

In [29]:

# Step 2: Iterate over the DataFrame rows and the objects to update
for index, row in result.iterrows():
#     if row['zone_time_id'] > 2 and row['column_name_2'] == 'C':
#         # Modify the field_to_update value based on your requirement
#         new_value = 'new_value'
    zone.filter(zone_time_id=row['zone_time_id']).update(impression_predict=row['predicted_passenger_count'])
    print(row['zone_time_id'],'updated')

529 updated
530 updated
531 updated
533 updated
523 updated
524 updated
525 updated
526 updated
527 updated
528 updated
541 updated
542 updated
543 updated
535 updated
536 updated
537 updated
538 updated
539 updated
540 updated
708 updated
709 updated
545 updated
546 updated
710 updated
707 updated
1930 updated
1931 updated
1932 updated
713 updated
714 updated
711 updated
534 updated
712 updated
1934 updated
1935 updated
1936 updated
1937 updated
718 updated
719 updated
532 updated
544 updated
691 updated
715 updated
716 updated
717 updated
720 updated
721 updated
722 updated
547 updated
548 updated
549 updated
550 updated
551 updated
552 updated
553 updated
554 updated
555 updated
556 updated
557 updated
558 updated
559 updated
560 updated
561 updated
562 updated
563 updated
564 updated
565 updated
566 updated
567 updated
568 updated
569 updated
570 updated
572 updated
573 updated
574 updated
575 updated
576 updated
1927 updated
578 updated
579 updated
580 updated
581 updated
582 upda

2052 updated
2053 updated
2054 updated
2055 updated
2056 updated
2057 updated
2058 updated
1071 updated
1072 updated
1073 updated
1074 updated
1075 updated
1076 updated
1077 updated
1078 updated
1079 updated
1080 updated
2035 updated
2044 updated
2059 updated
2060 updated
1061 updated
1062 updated
1063 updated
1064 updated
1065 updated
1066 updated
1067 updated
1068 updated
1069 updated
1070 updated
1081 updated
1082 updated
1083 updated
1084 updated
1085 updated
1086 updated
1087 updated
1088 updated
1089 updated
1090 updated
1091 updated
1092 updated
1093 updated
1094 updated
1095 updated
1096 updated
1097 updated
1098 updated
1099 updated
1100 updated
1101 updated
1102 updated
1103 updated
1104 updated
1105 updated
1106 updated
1107 updated
1108 updated
1109 updated
1110 updated
1111 updated
1112 updated
1113 updated
1114 updated
1115 updated
1116 updated
1117 updated
1118 updated
1119 updated
1120 updated
1121 updated
1122 updated
1123 updated
1124 updated
1125 updated
1126 updated

1628 updated
1629 updated
1630 updated
1631 updated
1632 updated
1633 updated
1634 updated
1635 updated
1636 updated
1637 updated
1638 updated
1639 updated
1640 updated
1641 updated
1642 updated
1643 updated
1644 updated
1645 updated
1646 updated
1647 updated
1648 updated
1649 updated
1650 updated
1652 updated
1653 updated
1654 updated
1655 updated
1656 updated
1657 updated
1658 updated
1659 updated
1660 updated
1661 updated
1662 updated
1663 updated
1664 updated
1665 updated
1651 updated
1666 updated
1667 updated
1668 updated
1669 updated
1670 updated
1671 updated
1672 updated
1673 updated
1674 updated
1675 updated
1676 updated
1677 updated
1678 updated
1679 updated
1680 updated
1681 updated
1682 updated
1683 updated
1684 updated
1685 updated
1686 updated
1687 updated
1688 updated
1689 updated
1690 updated
1691 updated
1692 updated
1693 updated
1694 updated
1695 updated
1696 updated
1697 updated
2116 updated
2117 updated
2118 updated
2119 updated
2120 updated
2121 updated
2122 updated

2449 updated
2450 updated
2451 updated
2452 updated
2453 updated
2454 updated
2455 updated
2456 updated
2457 updated
2458 updated
2459 updated
2460 updated
2461 updated
2462 updated
2463 updated
2464 updated
2465 updated
2466 updated
2467 updated
2468 updated
2469 updated
2470 updated
2471 updated
2472 updated
2473 updated
2474 updated
2475 updated
2476 updated
2477 updated
2478 updated
2479 updated
2480 updated
2481 updated
2482 updated
2483 updated
2484 updated
2485 updated
2486 updated
2487 updated
2488 updated
2489 updated
2490 updated
2491 updated
2492 updated
2493 updated
2494 updated
2495 updated
2496 updated
2497 updated
2498 updated
2499 updated
2500 updated
2501 updated
2502 updated
2503 updated
2504 updated
2505 updated
2506 updated
2507 updated
2508 updated
2509 updated
2510 updated
2511 updated
2512 updated
2513 updated
2514 updated
2515 updated
2516 updated
2517 updated
2518 updated
2519 updated
2520 updated
2521 updated
2522 updated
2523 updated
2524 updated
2525 updated

3085 updated
3086 updated
3075 updated
3076 updated
3077 updated
3078 updated
3079 updated
3080 updated
3087 updated
3088 updated
3089 updated
3090 updated
3091 updated
3092 updated
3093 updated
3094 updated
3095 updated
3096 updated
3097 updated
3098 updated
3099 updated
3100 updated
3101 updated
3102 updated
3103 updated
3104 updated
3105 updated
3106 updated
3107 updated
3108 updated
3109 updated
3110 updated
3111 updated
3112 updated
3113 updated
3114 updated
3115 updated
3116 updated
3117 updated
3118 updated
3119 updated
3120 updated
3121 updated
3122 updated
3123 updated
3124 updated
3125 updated
3126 updated
3127 updated
3128 updated
3129 updated
3130 updated
3131 updated
3132 updated
3133 updated
3134 updated
3135 updated
3136 updated
3137 updated
3138 updated
3139 updated
3140 updated
3141 updated
3142 updated
3143 updated
3144 updated
3145 updated
3146 updated
3147 updated
3148 updated
3149 updated
3150 updated
3151 updated
3152 updated
3153 updated
3154 updated
3155 updated