<a href="https://colab.research.google.com/github/siva-Parvathi-M/Siva/blob/main/ML_Operations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

# Load Dataset (Update the path if needed)
file_path = "/content/drive/MyDrive/Colab Notebooks/Source Data/Online Retail.xlsx"
df = pd.read_excel(file_path)

# Display first few rows
df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


**Data Preprocessing**

In [3]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing values
df.dropna(inplace=True)

# Convert InvoiceDate to datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Create a new feature - Total Price
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']

df.head()

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalPrice
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34


**Feature Selection & Train-Test Split**

In [4]:
from sklearn.model_selection import train_test_split

# Selecting features & target
features = ['Quantity', 'UnitPrice']
target = 'TotalPrice'

X = df[features]
y = df[target]

# Splitting into train & test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Train a Machine Learning Model**

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f"MAE: {mae}, MSE: {mse}")

MAE: 15.924114423604436, MSE: 6875.822210958749


**Save & Load the Model for Automation**

In [6]:
import joblib

# Save model
model_path = "/content/drive/My Drive/retail_model.pkl"
joblib.dump(model, model_path)

# Load model (to test)
loaded_model = joblib.load(model_path)
print("Model Loaded Successfully!")

Model Loaded Successfully!


**Automate Prediction (Batch Inference)**

In [7]:
# Load new data (assuming new data has 'Quantity' & 'UnitPrice')
new_data = pd.DataFrame({'Quantity': [5, 10], 'UnitPrice': [2.5, 5.0]})

# Predict total price
new_predictions = loaded_model.predict(new_data)
print(new_predictions)

[10.12626835 15.81841686]
