In [30]:

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from textblob import TextBlob

# Load questions dataset
try:
    data = pd.read_csv(r'C:\Rajveer\AI\AI Project\AI Project\Data\Questions.csv', encoding='utf-8',nrows=50000)
except UnicodeDecodeError:
   data = pd.read_csv(r'C:\Rajveer\AI\AI Project\AI Project\Data\Questions.csv', encoding='latin-1',nrows=50000)
data

Unnamed: 0,Id,OwnerUserId,CreationDate,ClosedDate,Score,Title,Body
0,80,26.0,2008-08-01T13:57:07Z,,26,SQLStatement.execute() - multiple queries in o...,<p>I've written a database generation script i...
1,90,58.0,2008-08-01T14:41:24Z,2012-12-26T03:45:49Z,144,Good branching and merging tutorials for Torto...,<p>Are there any really good tutorials explain...
2,120,83.0,2008-08-01T15:50:08Z,,21,ASP.NET Site Maps,<p>Has anyone got experience creating <strong>...
3,180,2089740.0,2008-08-01T18:42:19Z,,53,Function for creating color wheels,<p>This is something I've pseudo-solved many t...
4,260,91.0,2008-08-01T23:22:08Z,,49,Adding scripting functionality to .NET applica...,<p>I have a little game written in C#. It uses...
...,...,...,...,...,...,...,...
49995,2372290,271362.0,2010-03-03T15:01:30Z,,3,Hot to get random date in range,<p>I have two dates as strings (dd-mm-yyyy).\n...
49996,2372300,88092.0,2010-03-03T15:03:22Z,,1,Run Silverlight Project...doesn't point to web...,<p>I've created a Silverlight app and VS set u...
49997,2372320,282230.0,2010-03-03T15:05:59Z,,0,Server multithreading overkill?,<p>I'm creating a server-type application at t...
49998,2372330,285409.0,2010-03-03T15:06:53Z,,1,extjs tree using cntl x and v,<p>I have a client that is requesting to be ab...


In [31]:
# Preprocess data and extract features
data.dropna(inplace=True)  # Drop rows with missing values


In [32]:
# Extract features
data['text_length'] = data['Body'].apply(len)
data['sentiment'] = data['Body'].apply(lambda x: TextBlob(x).sentiment.polarity)
data['hour_of_day'] = pd.to_datetime(data['CreationDate']).dt.hour
data['day_of_week'] = pd.to_datetime(data['CreationDate']).dt.dayofweek


In [33]:
# Define target variable (score_category)
data['score_category'] = pd.cut(data['Score'], bins=[-float('inf'), 10, float('inf')], labels=['Low', 'High'])

In [34]:
# Select features and target variable
X = data[['text_length', 'sentiment', 'hour_of_day', 'day_of_week']]
y = data['score_category']


In [35]:
# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

In [36]:
# Define a function to predict score_category for a given question text
def predict_score_category(question_text, creation_date):
    text_length = len(question_text)
    sentiment = TextBlob(question_text).sentiment.polarity
    hour_of_day = pd.to_datetime(creation_date).hour
    day_of_week = pd.to_datetime(creation_date).dayofweek
    features = [[text_length, sentiment, hour_of_day, day_of_week]]
    predicted_category = model.predict(features)[0]
    return predicted_category

In [37]:

# Test the function with a sample question text and creation_date
sample_question_text = "Please Id like to know how to get pvalue for predictors of quantile regression? Please could You provide a code or explain me how get that for quantile regression?"
sample_creation_date = "2023-04-7T12:30:00"

predicted_category = predict_score_category(sample_question_text, sample_creation_date)
print(f"Predicted Score Category: {predicted_category}")


Predicted Score Category: Low


