In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler


# Set parameters
dates = pd.date_range(start='2024-01-01', periods=365, freq='D')
crops = ['Wheat', 'Rice', 'Corn']
grades = ['A', 'B', 'C']
weather_conditions = ['Clear', 'Cloudy', 'Rainy', 'Sunny']

# Generate random data
data = {
    'Date': np.tile(dates, len(crops)),
    'Crop Type': np.repeat(crops, len(dates)),
    'Grade': [random.choice(grades) for _ in range(len(dates) * len(crops))],
    'Market Price': np.random.uniform(100, 300, len(dates) * len(crops)),
    'Demand': np.random.randint(500, 1500, len(dates) * len(crops)),
    'Supply': np.random.randint(400, 1400, len(dates) * len(crops)),
    'Weather Condition': [random.choice(weather_conditions) for _ in range(len(dates) * len(crops))],
    'Rainfall (mm)': np.random.uniform(0, 20, len(dates) * len(crops)),
    'Temperature (°C)': np.random.uniform(15, 35, len(dates) * len(crops)),
    'Distance to Market (km)': np.random.uniform(10, 50, len(dates) * len(crops)),
    'Transport Cost': np.random.uniform(20, 100, len(dates) * len(crops)),
    'Peak Season': [1 if month in [3, 4, 10, 11] else 0 for month in pd.DatetimeIndex(dates).month.tolist() * len(crops)]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('market_supply_chain_data.csv', index=False)

print("Sample dataset created and saved as 'market_supply_chain_data.csv'")


Sample dataset created and saved as 'market_supply_chain_data.csv'


In [2]:
df.head()

Unnamed: 0,Date,Crop Type,Grade,Market Price,Demand,Supply,Weather Condition,Rainfall (mm),Temperature (°C),Distance to Market (km),Transport Cost,Peak Season
0,2024-01-01,Wheat,B,255.125107,1245,800,Clear,2.114994,33.125984,35.290827,50.891847,0
1,2024-01-02,Wheat,C,234.188679,1395,790,Clear,18.451649,23.335894,31.407378,51.455996,0
2,2024-01-03,Wheat,A,228.400461,527,448,Cloudy,14.808867,32.565693,42.037657,53.030315,0
3,2024-01-04,Wheat,C,275.60285,1273,804,Cloudy,18.680887,15.1823,40.213947,76.798912,0
4,2024-01-05,Wheat,B,284.523977,1152,1243,Clear,16.759093,31.15008,22.689841,96.430654,0


In [3]:
# Handle missing values
df.fillna(method='ffill', inplace=True)

# Feature Scaling
scaler = StandardScaler()
df[['Market Price', 'Demand', 'Supply', 'Rainfall (mm)', 'Temperature (°C)', 'Distance to Market (km)', 'Transport Cost (USD)']] = scaler.fit_transform(
    df[['Market Price', 'Demand', 'Supply', 'Rainfall (mm)', 'Temperature (°C)', 'Distance to Market (km)', 'Transport Cost']])

# Add additional time-based features
df['Month'] = pd.to_datetime(df['Date']).dt.month


In [4]:
df.tail()

Unnamed: 0,Date,Crop Type,Grade,Market Price,Demand,Supply,Weather Condition,Rainfall (mm),Temperature (°C),Distance to Market (km),Transport Cost,Peak Season,Transport Cost (USD),Month
1090,2024-12-26,Corn,B,0.467531,-1.145507,-0.114746,Rainy,-0.133201,-0.733631,-0.098835,77.231408,0,0.718244,12
1091,2024-12-27,Corn,B,-0.4297,-1.287043,0.496371,Sunny,-1.270537,1.593023,0.464497,20.663438,0,-1.689737,12
1092,2024-12-28,Corn,C,0.112474,-0.082261,-0.732807,Sunny,-1.685222,0.520109,-1.74759,81.966485,0,0.919806,12
1093,2024-12-29,Corn,B,0.67392,-1.452744,-1.298785,Rainy,1.645681,0.468157,0.479483,85.822699,0,1.083957,12
1094,2024-12-30,Corn,C,1.461451,0.155934,1.558881,Rainy,0.900998,-1.535212,-1.743517,21.263348,0,-1.6642,12


In [5]:
print("The shape of the dataset:-",(df.shape))

The shape of the dataset:- (1095, 14)


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1095 entries, 0 to 1094
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Date                     1095 non-null   datetime64[ns]
 1   Crop Type                1095 non-null   object        
 2   Grade                    1095 non-null   object        
 3   Market Price             1095 non-null   float64       
 4   Demand                   1095 non-null   float64       
 5   Supply                   1095 non-null   float64       
 6   Weather Condition        1095 non-null   object        
 7   Rainfall (mm)            1095 non-null   float64       
 8   Temperature (°C)         1095 non-null   float64       
 9   Distance to Market (km)  1095 non-null   float64       
 10  Transport Cost           1095 non-null   float64       
 11  Peak Season              1095 non-null   int64         
 12  Transport Cost (USD)     1095 non-

In [7]:
df.describe()

Unnamed: 0,Market Price,Demand,Supply,Rainfall (mm),Temperature (°C),Distance to Market (km),Transport Cost,Peak Season,Transport Cost (USD),Month
count,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0,1095.0
mean,3.62977e-17,7.137872000000001e-17,9.733462e-18,-4.734924e-17,-3.305322e-17,-2.516506e-16,60.358515,0.334247,-2.190029e-16,6.49863
std,1.000457,1.000457,1.000457,1.000457,1.000457,1.000457,23.502603,0.471942,1.000457,3.445548
min,-1.732919,-1.728911,-1.746706,-1.749576,-1.730767,-1.772897,20.081498,0.0,-1.714509,1.0
25%,-0.8071013,-0.8520785,-0.8890589,-0.8550168,-0.8744506,-0.8667991,39.909523,0.0,-0.8704711,4.0
50%,-0.03641749,0.004041634,-0.04182878,-0.009200729,-0.01463681,0.05820384,59.447265,0.0,-0.03879001,7.0
75%,0.8496745,0.8843265,0.8679019,0.8445593,0.8895616,0.8097817,81.460319,1.0,0.8982599,9.0
max,1.753176,1.71283,1.715132,1.761044,1.700838,1.759088,99.973762,1.0,1.686339,12.0


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Prepare features and target variable
X = df[['Demand', 'Supply', 'Rainfall (mm)', 'Temperature (°C)', 'Distance to Market (km)', 'Peak Season']]
y = df['Market Price']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)


Mean Squared Error: 1.147537666228753
