# Capstone Project: Predicting Maize Production in Rwanda
**Course:** INSY 8413 | Introduction to Big Data Analytics  
**Student:** Niyonshimira Jeanmarie  
**Sector:** Agriculture  
**Dataset:** FAOSTAT – Crop Production – Rwanda  
**Tool:** Python  
---


## Objectives
- Clean and preprocess agricultural data
- Analyze trends in maize production
- Predict maize production using a regression model
- Visualize findings and extract insights
---


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Load data
df = pd.read_csv("fao_rwanda_crop_production.csv")
df.head()


In [None]:
# Check for missing values
print("Missing values per column:")
print(df.isnull().sum())

# Ensure correct data types
df['Year'] = df['Year'].astype(int)
df['Production'] = df['Production'].astype(int)

# Summary statistics
df.describe()


In [None]:
# Plot production over time
plt.figure(figsize=(10,6))
sns.lineplot(data=df, x='Year', y='Production', marker='o')
plt.title("Maize Production in Rwanda Over Years")
plt.ylabel("Production (tonnes)")
plt.xlabel("Year")
plt.grid(True)
plt.show()


In [None]:
# Regression: Predict Production based on Area Harvested and Yield
X = df[['Area_Harvested', 'Yield']]
y = df['Production']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate model
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


In [None]:
# Innovation: Predict production using a custom logic-based estimator (simplified rule)
# For example, we create a synthetic estimator based on average yield per area
df['Estimated_Production'] = df['Area_Harvested'] * df['Yield']
df[['Year', 'Production', 'Estimated_Production']]
