In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

## Biofuels

In [None]:
df = pd.DataFrame(
    {
        'Period':range(2000, 2011),
        'United States':(105.54, 115.71, 140.29, 183.87, 223.29, 260.61, 334.95, 457.33, 649.68, 746.52, 887.62),
        'Brazil':(183.89, 197.59, 216.93, 249.35, 251.72, 276.42, 307.31, 395.68, 486.35, 477.53, 527.32),
        'Germany':(4.3, 5.4, 8.8, 14, 20.4, 35.8, 59.4, 63.8 ,65, 58, 62),
        'India':(2.9, 3.0, 3.2, 3.3, 3.5, 3.9, 4.5, 4.7, 5.2, 7, 7),
        'Malawi':(.2,) * 11,
        'Australia':(0, 0, 0, 0, .10, .6, 1.7, 2.1, 3.4, 5.2, 7.9),        
    }
)


df.info()

In [None]:
df.head()

In [None]:
pivot = df.melt(id_vars=['Period'], var_name='Location', value_name='Production')
pivot

In [None]:
pivot.to_clipboard(excel=True, sep="\t", index=None)

## Random Data

In [None]:
start_date, end_date = '2010-01-01', '2021-01-01'

location_list = ['DE', 'CT', 'NY', 'NJ']
location_prob = [.18,.23,.25,.34]

employee_list = ['Pat', 'Leslie', "Jes", 'Sam']
employee_prob = [.1,.2,.3,.4]

current_date = datetime.now()

N = 12000

np.random.RandomState(seed=7)

df = pd.DataFrame(
    
    {
        'tran_date':np.random.choice(np.arange(start_date, end_date, dtype='datetime64[D]'), size=N),
        'locations':np.random.choice(location_list, size=(N,), p=location_prob),
        'employees':np.random.choice(employee_list, size=(N,), p=employee_prob),
        'sales_hrs':np.random.choice(np.arange(1,25,1, dtype='int64'), size=N),
        'sales_tot':np.random.normal(loc=1000, scale=20, size=N).round(2)
    }
    
)

df['docs_date'] = current_date.strftime("%Y-%m-%d %H:%M:%S")
df['forecasts'] = np.where(df['tran_date'] <= current_date.strftime("%Y-%m-%d"), False, True)
df.loc[df['tran_date'].dt.year == 2015, ['sales_tot']] = df['sales_tot'] * .65

em = df['employees'] == 'Leslie'
dt = df['tran_date'].between("2012-02-17", "2014-07-31")

df.loc[em & dt, ['sales_hrs', 'sales_tot']] = 0

df.dropna(axis=0, inplace=True)

df['sales_hrs']

df.info()

In [None]:
df.head()

In [None]:
df.groupby(pd.Grouper(key='tran_date', freq='Y')).sum()\
.plot(subplots=True, marker='o', markerfacecolor='gray', lw=3, ls='--', figsize=(8,10));

## Map Data

In [None]:
URL = 'https://www2.census.gov/geo/docs/reference/cenpop2010/CenPop2010_Mean_ST.txt'

df = pd.read_csv(URL)

df.info()

In [None]:
df.head()