# Honey Production Analysis with Linear Regression

In [None]:
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model

## Step 1: Load and Preview the Data
We load the dataset containing honey production data in the US.

In [None]:
df = pd.read_csv("https://content.codecademy.com/programs/data-science-path/linear_regression/honeyproduction.csv")
print(df.head(5))

## Step 2: Average Total Production per Year
We group the data by year and calculate the average total honey production.

In [None]:
prod_per_year = df.groupby('year').totalprod.mean()
print(prod_per_year)

## Step 3: Prepare Feature and Target Variables

In [None]:
X = prod_per_year.index.values.reshape(-1, 1)
y = prod_per_year.values
print(X)
print(y)

## Step 4: Visualize Data
We plot a scatterplot to visualize honey production over the years.

In [None]:
plt.scatter(X, y)
plt.xlabel("Year")
plt.ylabel("Average Total Production")
plt.title("Honey Production Over Time")

## Step 5: Fit Linear Regression Model

In [None]:
regr = linear_model.LinearRegression()
regr.fit(X, y)
print("Slope:", regr.coef_[0])
print("Intercept:", regr.intercept_)

## Step 6: Predict Production on Known Years

In [None]:
y_predict = regr.predict(X)
plt.scatter(X, y)
plt.plot(X, y_predict, color='red')
plt.xlabel("Year")
plt.ylabel("Average Total Production")
plt.title("Regression Line Over Scatterplot")
 ## OR plot_data(X, y, y_predict, X_future, future_predict)plt.show()

## Step 7: Predict Future Honey Production
We use our model to forecast production from 2013 to 2049.

In [None]:
X_future = np.array(range(2013, 2050)).reshape(-1, 1)
future_predict = regr.predict(X_future)

plt.plot(X, y, label='Historical Data')
plt.plot(X_future, future_predict, label='Future Prediction', color='green')
plt.xlabel("Year")
plt.ylabel("Average Total Production")
plt.title("Forecasting Honey Production")
plt.legend()
plt.show()