In [1]:
import random as rand
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [2]:
class RandomTrafficIncident:
    def __init__(self, csv):
        self.df = pd.read_csv(csv)
    
    def generate_event(self):
        Direction = rand.choice(self.df['Type'])
        Location = rand.choice(self.df['Location'])
        Longitude = rand.choice(self.df['Longitude'])
        Latitude = rand.choice(self.df['Latitude'])
        Time = rand.choice(self.df['Time'])
        
        return {'Direction': Direction, \
                'Location': Location, \
                'Latitude': Latitude, \
                'Longitude': Longitude, \
                'Time': Time}

In [3]:
csv = 'data_mmda_traffic_spatial.csv'
random_traffic_event = RandomTrafficIncident(csv)
df = random_traffic_event.df
df.isnull().sum()

Date             0
Time             0
City             0
Location         0
Latitude         0
Longitude        0
High_Accuracy    0
Direction        0
Type             0
Lanes_Blocked    0
Involved         0
Tweet            0
Source           0
dtype: int64

In [4]:
keys = { }

In [5]:
X = df[['Direction', 'Location', 'Latitude', 'Longitude']]
enc = LabelEncoder()
encoders = {}
X_encoded = pd.DataFrame()

for feature in X.columns:
    encoders[feature] = LabelEncoder()
    X_encoded[feature] = encoders[feature].fit_transform(X[feature])


In [6]:

for item in X_encoded:
    keys[item] = { }
    for value in np.nditer(X_encoded[item]):
        value = value.tolist()
        temp = encoders[item].inverse_transform([value])[0]
        keys[item][temp] = value

for key in keys:
    print(keys[key])


{'EB': 2, 'NB': 4, 'SB': 5, 'EB.': 3, 'WB': 6, 'DAR': 1, 'CLARA': 0}
{'ORTIGAS EMERALD': 1912, 'EDSA GUADIX': 980, 'EDSA ROCKWELL': 1262, 'ORTIGAS CLUB FILIPINO': 1899, 'C5 KALAYAAN': 335, 'EDSA ORTIGAS ROBINSONS': 1152, 'EDSA LIGHT MALL': 1022, 'EDSA FARMERS': 930, 'C5 LANUZA': 422, 'C5 ATENEO KATIPUNAN': 227, 'EDSA BONI': 833, 'MARCOS HIGHWAY LRT SANTOLAN': 1632, 'EDSA HERITAGE': 985, 'C5 ORTIGAS FLYOVER': 473, 'EDSA ERMIN GARCIA': 913, 'COMMONWEALTH DILIMAN': 537, 'C5 EASTWOOD': 294, 'NORTH AVE. AGHAM': 1817, 'MARCOS HIGHWAY LIGAYA': 1629, 'QUEZON AVENUE': 2099, 'EDSA AYALA TUNNEL': 806, 'EDSA NEPA Q-MART': 1105, 'EDSA P. TUAZON': 1171, 'C5 GREENMEADOWS': 312, 'COMMONWEALTH LUZON': 580, 'LUZON F/O': 1576, 'ELLIPTICAL': 1403, 'EDSA P. TUAZON TUNNEL': 1173, 'EDSA ESTRELLA': 920, 'EDSA RELIANCE': 1244, 'QUEZON AVE. AGHAM': 2059, 'ROXAS BLVD. BUENDIA AVE.': 2228, 'B. SERRANO KATIPUNAN TUNNEL': 161, 'EDSA BUENDIA': 857, 'EDSA BUENDIA SPLIT': 869, 'EDSA PET PLANS': 1183, 'C5 LIBIS': 429, 

In [7]:

X_encoded.head()
y_encoded = enc.fit_transform(df['Time'])
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train.astype(str), y_train)
y_pred = model.predict(X_test.astype(str))
mse = mean_squared_error(y_test, y_pred)
print(X_encoded)


       Direction  Location  Latitude  Longitude
0              2      1912       269        738
1              4       980       301        675
2              5      1262       130        472
3              4       980       301        675
4              2      1899       380        545
...          ...       ...       ...        ...
15309          5      2119       261        134
15310          4       920       130        472
15311          5      1163       491        617
15312          6      1856       324        692
15313          6      1569       361        171

[15314 rows x 4 columns]


In [8]:
keys['Time'] = { }
for value in np.nditer(y_encoded):
    value = value.tolist()
    keys['Time'][value] = enc.inverse_transform([value])[0]
keys['Time']

{1139: '7:55 AM',
 1240: '8:42 AM',
 1304: '9:13 AM',
 53: '10:27 AM',
 155: '11:18 AM',
 307: '12:33 PM',
 319: '12:39 PM',
 543: '2:51 PM',
 684: '4:14 PM',
 813: '5:17 PM',
 945: '6:21 PM',
 1065: '7:19 PM',
 1117: '7:44 PM',
 1274: '8:58 PM',
 1036: '7:05 AM',
 0: '10:00 AM',
 93: '10:47 AM',
 560: '3:03 PM',
 942: '6:20 AM',
 963: '6:30 AM',
 982: '6:39 AM',
 986: '6:41 AM',
 988: '6:42 AM',
 1000: '6:48 AM',
 1058: '7:16 AM',
 1066: '7:20 AM',
 1069: '7:21 AM',
 1082: '7:27 AM',
 1183: '8:15 AM',
 1225: '8:35 AM',
 1273: '8:58 AM',
 1275: '8:59 AM',
 1342: '9:32 AM',
 29: '10:15 AM',
 73: '10:37 AM',
 101: '10:51 AM',
 115: '10:58 AM',
 432: '1:40 PM',
 471: '2:05 PM',
 497: '2:21 PM',
 531: '2:43 PM',
 532: '2:44 PM',
 541: '2:50 PM',
 635: '3:47 PM',
 693: '4:18 PM',
 695: '4:19 PM',
 738: '4:40 PM',
 746: '4:44 PM',
 829: '5:24 PM',
 898: '5:58 PM',
 905: '6:02 PM',
 951: '6:24 PM',
 1168: '8:07 PM',
 814: '5:18 AM',
 1137: '7:54 AM',
 1169: '8:08 AM',
 1219: '8:32 AM',
 1246:

In [11]:
new_raw = {'Direction': ['EB', 'NB'], \
           'Location': ['ATENEO KATIPUNAN', 'EDSA MAIN AVE'], \
           'Latitude': [14.638481, 14.537979], \
           'Longitude': [121.07454, 121.053801]}

for item in new_raw:
    temp = [ ]
    for value in new_raw[item]:
        temp.append(keys[item][value])
    new_raw[item] = temp

In [12]:

new_data = pd.DataFrame(new_raw)
new_data_encoded = pd.DataFrame()
print(new_data)

   Direction  Location  Latitude  Longitude
0          2        74       688        869
1          4      1050        72        631


In [14]:
for feature in new_data.columns:
    new_data_encoded[feature] = encoders[feature].fit_transform(new_data[feature])
enc_new_predictions = model.predict(new_data_encoded.astype(str))
new_predictions = [ ]
for value in enc_new_predictions:
    new_predictions.append(keys['Time'][value])
print("New Data Predictions:", new_predictions)

New Data Predictions: ['8:28 PM', '8:28 PM']
