In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load your CSV dataset
# Replace 'your_dataset.csv' with the actual file path
dataset = pd.read_csv('/content/drive/MyDrive/yield_df.csv')

# Define input features (X) and output variable (y)
X = dataset[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = dataset['hg/ha_yield']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Gaussian Naive Bayes model
gnb = GaussianNB()
gnb.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = gnb.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Now you can use the trained model to predict new values
new_data = pd.DataFrame({
    'Area': "India",
    'Item': "Sweet potatoes",
    'Year': 2007,
    'average_rain_fall_mm_per_year': 1083,
    'pesticides_tonnes': 27422,
    'avg_temp': 26
})

# Standardize the new data using the scaler from the training set
new_data_scaled = scaler.transform(new_data)

# Make predictions for the new data
new_prediction = gnb.predict(new_data_scaled)

# Print the predicted value
print(f'Predicted hg/ha_yield: {new_prediction[0]}')


ValueError: ignored

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Define input features and output variable
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Now, you can use the trained model to predict new values
new_values = pd.DataFrame({
    'Area': "India",
    'Item': "Sweet potatoes",
    'Year': 2007,
    'average_rain_fall_mm_per_year': 1083,
    'pesticides_tonnes': 27422,
    'avg_temp': 26
})

# Standardize the new input values
new_values_scaled = scaler.transform(new_values)

# Make predictions for the new values
prediction = model.predict(new_values_scaled)

print(f'Predicted hg/ha_yield: {prediction[0]}')


ValueError: ignored

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Define input features and output variable
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data using ColumnTransformer and Pipeline
numeric_features = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
categorical_features = ['Area', 'Item']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create and train the linear regression model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Now, you can use the trained model to predict new values
new_values = pd.DataFrame({
    'Area': ["India"],
    'Item': ["Soybeans"],
    'Year': [2007],
    'average_rain_fall_mm_per_year': [1083],
    'pesticides_tonnes': [27422.77],
    'avg_temp': [25.82]
})


# Make predictions for the new values
prediction = model.predict(new_values)

print(f'Predicted hg/ha_yield: {prediction[0]}')



Mean Squared Error: 1776114057.604053
Predicted hg/ha_yield: 22597.121059748897


In [None]:
import joblib

# Save the trained model to a file
model_filename = 'hari.joblib'
joblib.dump(model, model_filename)

print(f'Model saved to {model_filename}')


Model saved to hari.joblib


In [None]:
import pickle
with open('model_pickle','wb') as f:
  pickle.dump(model,f)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Define input features and output variable
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data using ColumnTransformer and Pipeline
numeric_features = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
categorical_features = ['Area', 'Item']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create and train the linear regression model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the trained model to a file
model_filename = 'linear_regression_model.joblib'
joblib.dump(model, model_filename)
print(f'Model saved to {model_filename}')


Mean Squared Error: 1776114057.604053
Model saved to linear_regression_model.joblib


In [None]:
import pickle
with open('model_pickle1','wb') as f:
  pickle.dump(model,f)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import joblib

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Label encode categorical columns
label_encoder = LabelEncoder()
df['Area'] = label_encoder.fit_transform(df['Area'])
df['Item'] = label_encoder.fit_transform(df['Item'])

# Define input features and output variable
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the trained model to a file
model_filename = 'linear_regression_model.joblib'
joblib.dump(model, model_filename)
print(f'Model saved to {model_filename}')

# Now, you can use the trained model to predict new values
new_values = pd.DataFrame({
    'Area': [label_encoder.transform(["India"])[0]],
    'Item': [label_encoder.transform(["Sweet potatoes"])[0]],
    'Year': [2007],
    'average_rain_fall_mm_per_year': [1083],
    'pesticides_tonnes': [27422.77],
    'avg_temp': [27.31]
})

# Standardize the new input values
new_values_scaled = scaler.transform(new_values)

# Make predictions for the new values
prediction = model.predict(new_values_scaled)

print(f'Predicted hg/ha_yield: {prediction[0]}')


Mean Squared Error: 6642543471.014076
Model saved to linear_regression_model.joblib


ValueError: ignored

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import joblib

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Label encode categorical columns
label_encoder_area = LabelEncoder()
label_encoder_item = LabelEncoder()

df['Area'] = label_encoder_area.fit_transform(df['Area'])
df['Item'] = label_encoder_item.fit_transform(df['Item'])

# Define input features and output variable
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the trained model to a file
model_filename = 'singh.joblib'
joblib.dump((model, scaler, label_encoder_area, label_encoder_item), model_filename)
print(f'Model saved to {model_filename}')

# Now, you can use the trained model to predict new values
new_values = pd.DataFrame({
    'Area': ['India'],
    'Item': ['Soybeans'],
    'Year': [2007],
    'average_rain_fall_mm_per_year': [1083],
    'pesticides_tonnes': [27422.77],
    'avg_temp': [27.31]
})

# Transform the new input values
new_values['Area'] = label_encoder_area.transform(new_values['Area'])
new_values['Item'] = label_encoder_item.transform(new_values['Item'])
new_values_scaled = scaler.transform(new_values)

# Make predictions for the new values
prediction = model.predict(new_values_scaled)

print(f'Predicted hg/ha_yield: {prediction[0]}')


Mean Squared Error: 6642543471.014076
Model saved to singh.joblib
Predicted hg/ha_yield: 56026.59629557935


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import joblib

# Load your dataset from the CSV file
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Label encode categorical columns
label_encoder_area = LabelEncoder()
label_encoder_item = LabelEncoder()

df['Area'] = label_encoder_area.fit_transform(df['Area'])
df['Item'] = label_encoder_item.fit_transform(df['Item'])

# Exclude 'Year' from input features
X = df[['Area', 'Item', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the trained model to a file
model_filename = 'thakur.joblib'
joblib.dump((model, scaler, label_encoder_area, label_encoder_item), model_filename)
print(f'Model saved to {model_filename}')

# Now, you can use the trained model to predict new values
new_values = pd.DataFrame({
    'Area': ['Albania'],
    'Item': ['Maize'],
    'average_rain_fall_mm_per_year': [1485],
    'pesticides_tonnes': [121],
    'avg_temp': [16.37]
})

# Transform the new input values
new_values['Area'] = label_encoder_area.transform(new_values['Area'])
new_values['Item'] = label_encoder_item.transform(new_values['Item'])
new_values_scaled = scaler.transform(new_values)

# Make predictions for the new values
prediction = model.predict(new_values_scaled)

print(f'Predicted hg/ha_yield: {prediction[0]}')


Mean Squared Error: 6687864263.723917
Model saved to thakur.joblib
Predicted hg/ha_yield: 102856.32474188472


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Load the dataset
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with StandardScaler and RandomForestRegressor
model = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the model (optional)
# You can save the trained model using joblib or pickle for later use
# For example, using joblib:
# from joblib import dump
# dump(model, 'yield_model.joblib')


ValueError: ignored

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
file_path = '/content/drive/MyDrive/yield_df.csv'
df = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = df[['Area', 'Item', 'Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']]
y = df['hg/ha_yield']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define transformers for numerical and categorical columns
numerical_features = ['Year', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']
categorical_features = ['Area', 'Item']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a pipeline with preprocessing and RandomForestRegressor
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 89738281.71844366


In [None]:
import joblib

# Save the trained model to a file
model_filename = 'sree.joblib'
joblib.dump(model, model_filename)

print(f'Model saved to {model_filename}')

Model saved to sree.joblib
