In [1]:
from transformers import pipeline

# Load the sentiment-analysis model from Hugging Face
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name)

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


In [4]:
result = sentiment_pipeline("I've been waiting for a Hugging Face course my whole life.")
print(result)

[{'label': 'LABEL_1', 'score': 0.6970634460449219}]


In [5]:
print(result[0]['label'])

LABEL_1


In [None]:
import pandas as pd
import numpy as np
import joblib
import io
from pydantic import BaseModel

# Import all preprocessing functions
from scripts.preprocessing import (
    handle_missing_values,
    drop_columns,
    filter_outliers,
    feature_encoding,
    feature_scaling
)

"""
This is a simple FastAPI app that loads a saved model (from my previous data science project) and uses it to make predictions.
"""

# Define request schema
class InputData(BaseModel):
    Daily_Time_Spent_on_Site: float
    Age: float
    Area_Income: float
    Daily_Internet_Usage: float
    Gender: str
    Visit_Date: str
    City: str
    Province: str
    Category: str
    
    class Config:
        populate_by_name = True
        json_schema_extra = {
            "example": {
                "Daily_Time_Spent_on_Site": 68.95,
                "Age": 35,
                "Area_Income": 61833.90,
                "Daily_Internet_Usage": 256.09,
                "Gender": "Male",
                "Visit_Date": "2023-01-15",
                "City": "New York",
                "Province": "NY",
                "Category": "Technology"
            }
        }

In [None]:
# Preprocessing function
def preprocess_data(input_data: InputData):
    """
    Apply the same preprocessing steps used during model training
    """

    # Convert input data to DataFrame
    input_df = pd.DataFrame([input_data.model_dump()])
    
    # Print the input data columns for debugging
    print(f"Original input columns: {input_df.columns}")

    # Handle column name formatting - correct any column name mismatches
    column_mapping = {
        'Daily_Time_Spent_on_Site': 'Daily Time Spent on Site',
        'Area_Income': 'Area Income',
        'Daily_Internet_Usage': 'Daily Internet Usage',
        'Visit_Date': 'Visit Date'
    }

        
    input_df = input_df.rename(columns=column_mapping)
    print(f"Renamed columns: {input_df.columns}")

        # 1. Drop some uncessary columns
    try:    
        if 'Visit Time' in input_df.columns:
            input_df = drop_columns(input_df, columns=['Visit Time'])
    except Exception as e:
        print(f"Error in dropping columns: {str(e)}")

    # 2. Handle missing values
    try:
        input_df = handle_missing_values(input_df, columns=['Daily Time Spent on Site', 'Daily Internet Usage'], strategy='fill', imputation_method='mean')
        input_df['Area Income'] = handle_missing_values(input_df, columns=['Area Income'], strategy='fill', imputation_method='median')
        input_df['Gender'] = handle_missing_values(input_df, columns=['Gender'], strategy='fill', imputation_method='mode')
    except Exception as e:
        print(f"Error in handling missing values: {str(e)}")

    # 3. Handle outliers
    try:
        input_df = filter_outliers(input_df, col_series=['Area Income'], method='iqr')
    except Exception as e:
        print(f"Error in filtering outliers: {str(e)}")

    # 4. Feature encoding
    try:
        input_df, expected_columns = feature_encoding(input_df, original_data=ori_df_preprocessed)
    except Exception as e:
        print(f"Error in feature encoding: {str(e)}")
        if 'input_df' in locals():
            print(f"Input data columns: {input_df.columns}")
        print(f"Original data columns: {ori_df_preprocessed.columns}")

    # 5. Feature scaling
    try:
        input_df = feature_scaling(input_df, original_data=ori_df_preprocessed)
    except Exception as e:
        print(f"Error in feature scaling: {str(e)}")

    # Return the preprocessed data
    return input_df