# Loading the data

In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv("global_meat.csv")

data.head()


Unnamed: 0,Country,Code,Year,Meat_total
0,Bahamas,BHS,1961,126.7
1,Brunei,BRN,1961,1289.8
2,Qatar,QAT,1961,1769.2
3,Faroe Islands,FRO,1961,
4,Tuvalu,TUV,1961,3.0


In [2]:

# Display information about the dataset before preprocessing
print("Information about the dataset before preprocessing:")
print(data.info())

Information about the dataset before preprocessing:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14382 entries, 0 to 14381
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Country     14382 non-null  object 
 1   Code        11719 non-null  object 
 2   Year        14382 non-null  int64  
 3   Meat_total  14377 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 449.6+ KB
None


# Handling Missing Values(Imputation)

In [3]:
# Step 2: Handling Missing Values (Imputation)
from sklearn.impute import SimpleImputer

In [4]:
# Impute missing values in the 'Meat_total' column with the mean
imputer = SimpleImputer(strategy='mean')
data['Meat_total'] = imputer.fit_transform(data[['Meat_total']])

In [5]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [6]:
from sklearn.preprocessing import OneHotEncoder

# One-hot encode 'Country' and 'Code' columns
encoder = OneHotEncoder(sparse=False, drop='first')
encoded_cols = encoder.fit_transform(data[['Country', 'Code']])
encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out(['Country', 'Code']))
data_encoded = pd.concat([data.drop(['Country', 'Code'], axis=1), encoded_df], axis=1)

# Display the first few rows after encoding
data_encoded.head()


Unnamed: 0,Year,Meat_total,Country_Africa,Country_Africa (FAO),Country_Albania,Country_Algeria,Country_Americas (FAO),Country_Angola,Country_Antigua and Barbuda,Country_Argentina,...,Code_VCT,Code_VEN,Code_VNM,Code_VUT,Code_WSM,Code_YEM,Code_ZAF,Code_ZMB,Code_ZWE,Code_nan
0,1961,126.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1961,1289.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1961,1769.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1961,1697049.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1961,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
data.columns

Index(['Country', 'Code', 'Year', 'Meat_total'], dtype='object')

# Label Encoding

In [8]:
from sklearn.preprocessing import LabelEncoder

# Label encode 'Country' and 'Code' columns
label_encoder = LabelEncoder()
data['Country_LabelEncoded'] = label_encoder.fit_transform(data['Country'])
data['Code_LabelEncoded'] = label_encoder.fit_transform(data['Code'])

# Display the first few rows after label encoding
data.head()


Unnamed: 0,Country,Code,Year,Meat_total,Country_LabelEncoded,Code_LabelEncoded
0,Bahamas,BHS,1961,126.7,15,17
1,Brunei,BRN,1961,1289.8,29,24
2,Qatar,QAT,1961,1769.2,183,159
3,Faroe Islands,FRO,1961,1697049.0,78,59
4,Tuvalu,TUV,1961,3.0,233,193


# Standardization

In [9]:
from sklearn.preprocessing import StandardScaler

# Standardize the 'Year' and 'Meat_total' columns
scaler = StandardScaler()
data[['Year', 'Meat_total']] = scaler.fit_transform(data[['Year', 'Meat_total']])

# Display the first few rows after standardization
data.head()


Unnamed: 0,Country,Code,Year,Meat_total,Country_LabelEncoded,Code_LabelEncoded
0,Bahamas,BHS,0.853025,-0.251293,15,17
1,Brunei,BRN,0.853025,-0.25112,29,24
2,Qatar,QAT,0.853025,-0.251049,183,159
3,Faroe Islands,FRO,0.853025,0.0,78,59
4,Tuvalu,TUV,0.853025,-0.251311,233,193


# Min-Max Scaling

In [10]:
from sklearn.preprocessing import MinMaxScaler

# Min-Max scale the 'Year' and 'Meat_total' columns
min_max_scaler = MinMaxScaler()
data[['Year', 'Meat_total']] = min_max_scaler.fit_transform(data[['Year', 'Meat_total']])

# Display the first few rows after Min-Max scaling
data.head()


Unnamed: 0,Country,Code,Year,Meat_total,Country_LabelEncoded,Code_LabelEncoded
0,Bahamas,BHS,0.980971,9.857461e-07,15,17
1,Brunei,BRN,0.980971,1.010684e-05,29,24
2,Qatar,QAT,0.980971,1.386632e-05,183,159
3,Faroe Islands,FRO,0.980971,0.01330834,78,59
4,Tuvalu,TUV,0.980971,1.568411e-08,233,193


# Importing Necessary Libraries and Spliting the Data

In [11]:
from sklearn.model_selection import train_test_split

# Split the data into features (X) and target variable (y)
X = data.drop(['Meat_total'], axis=1)
y = data['Meat_total']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

# Display the first few rows of the training set
print("\nFirst few rows of the training set:")
print(X_train.head())

# Display the first few rows of the testing set
print("\nFirst few rows of the testing set:")
print(X_test.head())


Training set shape: (11505, 5) (11505,)
Testing set shape: (2877, 5) (2877,)

First few rows of the training set:
                             Country Code      Year  Country_LabelEncoded  \
2980             Southern Asia (FAO)  NaN  0.987481                   212   
11625                          Haiti  HTI  0.104657                    96   
13610                          Gabon  GAB  0.108663                    84   
12174          Northern Africa (FAO)  NaN  0.105658                   165   
13269  Upper-middle-income countries  NaN  0.108162                   240   

       Code_LabelEncoded  
2980                 210  
11625                 78  
13610                 61  
12174                210  
13269                210  

First few rows of the testing set:
                            Country Code      Year  Country_LabelEncoded  \
12417                          Chad  TCD  0.106159                    41   
4394            Middle Africa (FAO)  NaN  0.098147                   144 

In [12]:
print("Columns in X:")
print(X.columns)


Columns in X:
Index(['Country', 'Code', 'Year', 'Country_LabelEncoded', 'Code_LabelEncoded'], dtype='object')


In [13]:
from sklearn.model_selection import train_test_split

# Split the data into features (X) and target variable (y)
X = data.drop(['Meat_total'], axis=1)
y = data['Meat_total']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

# Display the first few rows of the training set
print("\nFirst few rows of the training set:")
print(X_train.head())

# Display the first few rows of the testing set
print("\nFirst few rows of the testing set:")
print(X_test.head())


Training set shape: (11505, 5) (11505,)
Testing set shape: (2877, 5) (2877,)

First few rows of the training set:
                             Country Code      Year  Country_LabelEncoded  \
2980             Southern Asia (FAO)  NaN  0.987481                   212   
11625                          Haiti  HTI  0.104657                    96   
13610                          Gabon  GAB  0.108663                    84   
12174          Northern Africa (FAO)  NaN  0.105658                   165   
13269  Upper-middle-income countries  NaN  0.108162                   240   

       Code_LabelEncoded  
2980                 210  
11625                 78  
13610                 61  
12174                210  
13269                210  

First few rows of the testing set:
                            Country Code      Year  Country_LabelEncoded  \
12417                          Chad  TCD  0.106159                    41   
4394            Middle Africa (FAO)  NaN  0.098147                   144 

In [14]:
from sklearn.preprocessing import LabelEncoder

# Re-apply label encoding to 'Country' and 'Code' columns
label_encoder = LabelEncoder()
X['Country_LabelEncoded'] = label_encoder.fit_transform(X['Country'])
X['Code_LabelEncoded'] = label_encoder.fit_transform(X['Code'])

# Drop the original 'Country' and 'Code' columns
X = X.drop(['Country', 'Code'], axis=1)

# Display the first few rows of the dataset to verify the changes
print(X.head())


       Year  Country_LabelEncoded  Code_LabelEncoded
0  0.980971                    15                 17
1  0.980971                    29                 24
2  0.980971                   183                159
3  0.980971                    78                 59
4  0.980971                   233                193


In [15]:
from sklearn.preprocessing import LabelEncoder

# Re-apply label encoding to 'Country' and 'Code' columns
label_encoder = LabelEncoder()
data['Country_LabelEncoded'] = label_encoder.fit_transform(data['Country'])
data['Code_LabelEncoded'] = label_encoder.fit_transform(data['Code'])

# Drop the original 'Country' and 'Code' columns
data = data.drop(['Country', 'Code'], axis=1)

# Split the data into features (X) and target variable (y)
X = data.drop(['Meat_total'], axis=1)
y = data['Meat_total']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the first few rows of the training set to verify the changes
print(X_train.head())



           Year  Country_LabelEncoded  Code_LabelEncoded
2980   0.987481                   212                210
11625  0.104657                    96                 78
13610  0.108663                    84                 61
12174  0.105658                   165                210
13269  0.108162                   240                210


# Linear Regression

In [16]:
from sklearn.linear_model import LinearRegression

# Create and train the Linear Regression model
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Calculate training and testing scores
train_score_linear_reg = linear_reg.score(X_train, y_train)
test_score_linear_reg = linear_reg.score(X_test, y_test)

# Display training and testing scores
print("Linear Regression:")
print(f"  Training R^2 score: {train_score_linear_reg:.4f}")
print(f"  Testing R^2 score: {test_score_linear_reg:.4f}")


Linear Regression:
  Training R^2 score: 0.1003
  Testing R^2 score: 0.0808


# Decision Tree Regressor 

In [17]:
from sklearn.tree import DecisionTreeRegressor

# Create and train the Decision Tree Regressor model
dt_reg = DecisionTreeRegressor()
dt_reg.fit(X_train, y_train)

# Calculate training and testing scores
train_score_dt_reg = dt_reg.score(X_train, y_train)
test_score_dt_reg = dt_reg.score(X_test, y_test)

# Display training and testing scores
print("Decision Tree Regressor:")
print(f"  Training R^2 score: {train_score_dt_reg:.4f}")
print(f"  Testing R^2 score: {test_score_dt_reg:.4f}")


Decision Tree Regressor:
  Training R^2 score: 0.9545
  Testing R^2 score: -0.1599


#  Random Forest Regressor

In [18]:
from sklearn.ensemble import RandomForestRegressor

# Create and train the Random Forest Regressor model
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)

# Calculate training and testing scores
train_score_rf_reg = rf_reg.score(X_train, y_train)
test_score_rf_reg = rf_reg.score(X_test, y_test)

# Display training and testing scores
print("Random Forest Regressor:")
print(f"  Training R^2 score: {train_score_rf_reg:.4f}")
print(f"  Testing R^2 score: {test_score_rf_reg:.4f}")


Random Forest Regressor:
  Training R^2 score: 0.8458
  Testing R^2 score: 0.2144


# Support Vector Machine (SVR)

In [19]:
from sklearn.svm import SVR

# Create and train the Support Vector Machine (SVR) model
svr_reg = SVR()
svr_reg.fit(X_train, y_train)

# Calculate training and testing scores
train_score_svr_reg = svr_reg.score(X_train, y_train)
test_score_svr_reg = svr_reg.score(X_test, y_test)

# Display training and testing scores
print("Support Vector Machine (SVR):")
print(f"  Training R^2 score: {train_score_svr_reg:.4f}")
print(f"  Testing R^2 score: {test_score_svr_reg:.4f}")


Support Vector Machine (SVR):
  Training R^2 score: -2.6078
  Testing R^2 score: -2.6043


# K-Nearest Neighbors (KNN)

In [20]:
from sklearn.neighbors import KNeighborsRegressor

# Create and train the K-Nearest Neighbors (KNN) model
knn_reg = KNeighborsRegressor()
knn_reg.fit(X_train, y_train)

# Calculate training and testing scores
train_score_knn_reg = knn_reg.score(X_train, y_train)
test_score_knn_reg = knn_reg.score(X_test, y_test)

# Display training and testing scores
print("K-Nearest Neighbors (KNN):")
print(f"  Training R^2 score: {train_score_knn_reg:.4f}")
print(f"  Testing R^2 score: {test_score_knn_reg:.4f}")


K-Nearest Neighbors (KNN):
  Training R^2 score: 0.4641
  Testing R^2 score: 0.2235


# Hyperparameter Optimization for Support Vector Machine (SVR) using RandomizedSearchCV


In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from scipy.stats import uniform

# Define the parameter distribution for SVR
param_dist_svr = {
    'C': uniform(loc=0.1, scale=10),
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'poly', 'rbf']
}

# Create the SVR model
svr_reg = SVR()

# Perform RandomizedSearchCV
random_search_svr = RandomizedSearchCV(svr_reg, param_dist_svr, n_iter=50, cv=5, scoring='r2', random_state=42)
random_search_svr.fit(X_train, y_train)

# Print the best parameters and best score
print("Best Parameters for SVR:", random_search_svr.best_params_)
print("Best Score for SVR:", random_search_svr.best_score_)
