# Biofuels Production

---

**Purpose:**

Using file operations from the Python standard library

**Data Source:**

https://catalog.data.gov/dataset/biofuels-consumption-and-production-by-country-2000-2010-f729f

## Featured Libraries

In [None]:
import sys
sys.version

In [None]:
from datetime import datetime
current = datetime.now()

print("The date & time is", datetime.strftime(current, '%A, %m/%d/%Y at %I:%M %p'))

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# pandas options
pd.options.display.float_format = '{:,.2f}'.format
pd.options.display.max_rows = 250
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 0
pd.options.display.precision = 2

import seaborn as sns

plt.style.use('bmh')

print(pd.__version__)
print(np.__version__)

## Data Preparation

In [None]:
URL ='https://openei.org/doe-opendata/dataset/2a8611e3-69af-4549-bc5f-e0ac5986673a/resource/7c0e6c1a-4e9c-46a7-b841-a57ad820f3ea/download/totalbiofuelsproduction20002010thousandbarrelsperday.csv'

In [None]:
print(URL)

In [None]:
df = pd.read_csv(URL)
df

In [None]:
df.info()

In [None]:
df = pd.read_csv(URL, na_values=['-','--','NA'])
df.info()

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df = df.rename(columns = {'Unnamed: 0':'Location'})
df.info()

In [None]:
df.columns[1:].to_list()

In [None]:
df = df.melt(
    id_vars=['Location'],
    value_vars=df.columns[1:].to_list(),
    var_name='Period',
    value_name='Production'
)

df.info()

In [None]:
df.head(10)

In [None]:
df.isna().sum()

In [None]:
df.Production.fillna(0, inplace=True)

In [None]:
df.isna().sum()

In [None]:
df['Year End'] = df['Period'].apply(lambda x: datetime(int(x),1,1))

df.info()

In [None]:
df.head()

In [None]:
countries = ["United States", "Brazil", "Germany", "India", "Malawi", "Australia"]

countries

In [None]:
df = df[df['Location'].isin(countries)]
df

In [None]:
df = df.pivot_table(index='Year End', columns='Location', values='Production', aggfunc=sum, fill_value=0)
df

In [None]:
df.T

## Conditional Formatting

In [None]:
df.style.highlight_max().format('{:,.4f}')

In [None]:
df.style.highlight_max(axis=1).format("{:,.4f}")

In [None]:
df.style.highlight_max(color='lime').highlight_min(color='pink').format("{:,.4f}")

In [None]:
df.style.background_gradient('Blues').format("{:,.4f}")

In [None]:
df.style.background_gradient('coolwarm').format("{:,.4f}")

In [None]:
def check(val):
    if val < 20:
        return "background-color: orange"
    return ""

df.style.applymap(check).format('{:,.4f}')

## Line Charts

In [None]:
df.plot();

In [None]:
df.plot(marker='o');

In [None]:
df.plot(
    subplots=True,
    lw=2,
    marker='s',
    alpha=0.7,
    layout=(2,3),
    figsize=(12,6),
    grid=True,
    rot=45,
    sharex=False
);

In [None]:
for i, cols in df.iteritems():
    if cols.sum() > 3000:
        cols.plot(label=i, legend=True, marker='^', markerfacecolor='m')     

In [None]:
df.sum(axis=1).plot(lw=3, marker='*');

## Area Charts

In [None]:
df.plot.area();

In [None]:
df.plot.area(stacked=False);

## Bar Charts

In [None]:
picks=['Brazil','United States']

In [None]:
df.assign(PERIOD = df.index.year)\
.plot.bar(x='PERIOD', y=picks);

In [None]:
df.assign(PERIOD = df.index.year)\
.plot.barh(x='PERIOD', y=picks, stacked=True);

In [None]:
df.sum().sort_values().plot.bar();

## Pie Charts

In [None]:
pie_data = pd.Series(data=df.sum(), name='Biofuels Production')
pie_data

In [None]:
pie_data.plot.pie(
    cmap='Spectral',
    figsize=(6,6),
    autopct='%.01f%%'
);

In [None]:
pie_data[pie_data.values >200]

In [None]:
pie_data = pie_data[pie_data.values >200]

pie_data.plot.pie(
    cmap='Spectral',
    figsize=(6,6),
    autopct='%.01f%%'
);

In [None]:
pie_data.plot.pie(
    cmap='Spectral',
    autopct="%.1f%%",
    figsize=(6,6),
    pctdistance=0.9,
    wedgeprops=dict(width=0.3)
)

plt.show();

## Statistical Charts

In [None]:
df.plot.box();

In [None]:
df.plot.hist(subplots=True,figsize=(12, 6), layout=(2,3), sharex=False);

## Geospatial

In [None]:
df = df.reset_index().melt(id_vars=['Year End'], var_name=['Location'], value_name='Production')
df

In [None]:
!pip install geocoder

In [None]:
import geocoder

In [None]:
gdd = {}

for country in df['Location'].unique():
    gdd[country] = geocoder.osm(country).latlng

gdd

In [None]:
!pip install folium --upgrade

In [None]:
import folium

In [None]:
gf = df.groupby('Location').sum().reset_index()
gf

In [None]:
gf['LatLng'] = gf['Location'].map(gdd)
gf

In [None]:
#Create base map
folium_map = folium.Map(location=(0, 0), zoom_start = 2, tiles = "CartoDB Positron")

#Plot Markers
for index, row in gf.iterrows():
    popup_string = "{}={:,.0f}".format(row['Location'], row['Production'])
    folium.Circle(
        location=row['LatLng'],
        popup=popup_string,
        radius=row['Production'] * 250,
        color='red' if row['Production'] > 1000 else 'blue',
        fill=True,
        fill_color='lightgrey',
        fill_opacity=.7,
        tooltip = row['Location'],
    ).add_to(folium_map)

# show the map
folium_map

## Trends & Forecasts

In [None]:
xf = df.groupby(pd.Grouper(key='Year End', freq='y')).sum()
xf

In [None]:
xf.plot();

In [None]:
X = xf.index.year.values
X

In [None]:
Y = xf.values.ravel()
Y

In [None]:
np.corrcoef(X,Y)

In [None]:
sns.regplot(X, Y);

In [None]:
# numpy polyfit

# Creates a linear regression from the data points
m, b = np.polyfit(x=X, y=Y, deg=1)

print("m={:.02f},  b={:.02f}".format(m, b))

In [None]:
# This is a simple y = mx + b line function
def f(x):
    return m*x + b

# This generates the same scatter plot as before, but adds a line plot using the function above
sns.scatterplot(x=X, y=Y, alpha=.4);
sns.lineplot(X, f(X), color='grey');

In [None]:
fit = np.polyfit(x=X, y=Y, deg=1)

fit

In [None]:
np.polyval(fit, np.arange(2011,2015))

In [None]:
from sklearn.linear_model import LinearRegression

linreg = LinearRegression()

In [None]:
linreg.fit(X.ravel().reshape(-1,1), Y.reshape(-1,1))

In [None]:
print(linreg.intercept_)

print(linreg.coef_)

In [None]:
X.ravel().reshape(-1,1)

In [None]:
x_pred = np.arange(2011, 2015).reshape(-1,1)
x_pred

In [None]:
y_pred = linreg.predict(x_pred)
y_pred

In [None]:
X_new = np.concatenate((X, x_pred.ravel()))
X_new

In [None]:
Y_new = np.concatenate((Y, y_pred.ravel()))
Y_new

In [None]:
scenario = np.where(X_new <= X.max(), 'actual', 'forecast')
scenario

In [None]:
sns.lineplot(x=X_new, y=Y_new, hue=scenario)
plt.scatter(x=X_new, y=Y_new, marker='o', s=50, c='gray', alpha=.7);
plt.xticks(rotation=45);