In [15]:
import pandas as pd
from models.sentiment_model import SentimentModel
from models.opportunity_scorer import OpportunityScorer
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [16]:
business_data = pd.read_json('data/mock_data/business_data.json')
economic_data = pd.read_json('data/mock_data/economic_data.json')
social_data = pd.read_json('data/mock_data/sm_data.json')

In [17]:
combined_data = pd.concat([social_data, business_data, economic_data], axis = 1)

In [18]:
combined_data.head()

Unnamed: 0,sm_loc,sm_timestamp,text,sentiment_score,engagement_score,business_id,b_loc,type,revenue,age,rating,ec_timestamp,market_index,inflation_rate,interest_rate
0,Mumbai,1749586733170,This is a sample tweet about Mumbai.,0.665555,0.267257,B000,Bengaluru,Tech,94667.310209,5,4.774604,1749673133173,0.687877,0.020451,0.023174
1,Mumbai,1749500333170,This is a sample tweet about Mumbai.,0.660467,0.895652,B001,New Delhi,Tech,124431.850884,8,3.825776,1748290733173,0.687877,0.020451,0.023174
2,Mumbai,1750105133170,This is a sample tweet about Mumbai.,0.520147,0.718785,B002,Mumbai,Tech,48510.096086,2,3.687855,1748204333173,0.687877,0.020451,0.023174
3,New Delhi,1748117933170,This is a sample tweet about New Delhi.,0.935511,0.681257,B003,Bengaluru,Restaurant,108350.585687,2,4.012406,1747772333173,0.687877,0.020451,0.023174
4,Hyderabad,1749327533170,This is a sample tweet about Hyderabad.,0.412436,0.388872,B004,Hyderabad,Tech,90859.769894,14,3.122983,1749845933173,0.687877,0.020451,0.023174


In [23]:
opportunity_scorer = OpportunityScorer()
features = opportunity_scorer.prepare_features(combined_data)
combined_data['label'] = combined_data.apply(opportunity_scorer.generate_opportunity_score, axis=1)
labels = combined_data['label']

In [24]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)

In [25]:
opportunity_scorer.train_model(X_train, y_train)
y_pred = opportunity_scorer.model.predict(X_test)

In [26]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.985
Classification Report:
               precision    recall  f1-score   support

           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        55
           4       0.98      1.00      0.99        89
           5       1.00      0.91      0.95        32
           6       0.80      1.00      0.89         4

    accuracy                           0.98       200
   macro avg       0.96      0.98      0.97       200
weighted avg       0.99      0.98      0.98       200

Confusion Matrix:
 [[20  0  0  0  0]
 [ 0 55  0  0  0]
 [ 0  0 89  0  0]
 [ 0  0  2 29  1]
 [ 0  0  0  0  4]]
