In [None]:
# This model predicts the Stock quantity needed for the month for the specific Country

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
Sales_Data = pd.read_csv(r"F:\Sales_Data.csv", encoding= 'latin1')

In [3]:
Sales_Data

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


In [4]:
Sales_Data['InvoiceDate'] = pd.to_datetime(Sales_Data['InvoiceDate'])
Sales_Data['Month'] = Sales_Data['InvoiceDate'].dt.month

In [5]:
Sales_Data

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Month
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,12
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,12
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,12
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,12
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,12
...,...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France,12
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France,12
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France,12
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France,12


In [6]:
# Create a DataFrame for total quantity by Country, StockCode, Month
aggregated_data = Sales_Data.groupby(['Country', 'StockCode', 'Month'])['Quantity'].sum().reset_index()

In [7]:
aggregated_data.head()

Unnamed: 0,Country,StockCode,Month,Quantity
0,Australia,15036,5,600
1,Australia,15056BL,5,3
2,Australia,16161P,6,400
3,Australia,16169E,10,25
4,Australia,20665,3,6


In [8]:
# Define features and target
X = aggregated_data[['Country', 'StockCode', 'Month']]
y = aggregated_data['Quantity']  # Target is the total quantity needed

In [9]:
# Convert categorical variables to numeric
X = pd.get_dummies(X, columns=['Country', 'StockCode', 'Month'])

In [10]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Initialize and train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [12]:
# Make predictions
y_pred = model.predict(X_test)

In [20]:
# Get the list of countries present in the aggregated data
valid_countries = aggregated_data['Country'].unique()

# Example prediction function
def predict_quantity(country, stock_code, month):
    if country not in valid_countries:
        print(f"Sorry, we do not have data for the country: {country}.")
        return None
    
    input_data = pd.DataFrame({
        'Country': [country],
        'StockCode': [stock_code],
        'Month': [month]
    })
    
    # Convert categorical variables to numeric
    input_data = pd.get_dummies(input_data, columns=['Country', 'StockCode', 'Month'])
    
    # Ensure all columns match the training data
    input_data = input_data.reindex(columns=X.columns, fill_value=0)
    
    # Make prediction
    return model.predict(input_data)[0]

In [21]:
# Example usage
print(predict_quantity('United Kingdom', '85123A', 12))

628.7109375


In [22]:
# if you give the name of the country which is not in the dataset as input.
print(predict_quantity('India', '85123A', 12))

Sorry, we do not have data for the country: India.
None
