In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Loops & Dictionaries (10 minutes)
Create a nested dictionary of recipes, with keys at the top level and dictionaries as their associated values.

1. Create an empty dictionary named `recipes`.
1. Add three dishes as the **keys** of your dictionary (e.g. 'meat pie')
1. For each key, the **value** should be another dictionary consisting of `ingredient: amount` pairs. (eg `'sugar': 'two cups'`) Each dish should have three ingredients.
1. Write a function to process the recipe dictionary. This function should loop through the recipes and return a single list of all ingredients for all recipes.

In [None]:
# 1
recipes = {}

#2, 3
recipes['dish1'] = {'ingredient1a': 'amount1a',
                    'ingredient1b': 'amount1b',
                    'ingredient1c': 'amount1c'}
recipes['dish2'] = {'ingredient2a': 'amount2a',
                    'ingredient2b': 'amount2b',
                    'ingredient2c': 'amount2c'}
recipes['dish3'] = {'ingredient3a': 'amount3a',
                    'ingredient3b': 'amount3b',
                    'ingredient3c': 'amount3c'}

# 4
def process_recipes(recipes):
    all_ingredients = []
    for ingredients in recipes.values():
        for k, v in ingredients.items():
            all_ingredients.append(f"{v} of {k}")  # or similar
    return all_ingredients

process_recipes(recipes)

In [None]:
# 4 Bonus - one liner (neat but not recommended!)
def process_recipes_oneline(recipes):
    return [f"{v} of {k}" for ingredients in recipes.values() for k,v in ingredients.items()]
process_recipes_oneline(recipes)

## Coding Readibility (5 Minutes)
Rewrite the code below in using best practices:
```python
def blabla():
for i in range ( 10 ):for j in range( 10 ):for k in range(10): print( i,j,k ) return everything
```

In [None]:
def digits_one_to_999():
    """
    Docstring
    """
    all_nums = []
    for hundreds in range(10):
        for tens in range(10):
            for ones in range(10):
                print(hundreds, tens, ones)
                all_nums.append((hundreds, tens, ones))
    return all_nums

In [None]:
x = digits_one_to_999()

## Data cleaning (15 Minutes)
read data from http://rattle.togaware.com/weatherAUS.csv
1. Add a `day`, `month`, and `year` column (three new columns)
1. Print out the name of the columns where there are missing values
1. Drop all columns except `day`, `month`, `year`, `Rainfall`, and `Sunshine`
1. Fill all missing `Rainfall` values as follows:
  1. If `Sunshine` is over 7.0, set `Rainfall` = 0
  2. Else (including if `Sunshine is NaN`) set `Rainfall` to the mean `Rainfall` value.

In [None]:
df = pd.read_csv('http://rattle.togaware.com/weatherAUS.csv')

In [None]:
#1
df['day'] = pd.to_datetime(df.Date).dt.day
df['month'] = pd.to_datetime(df.Date).dt.month
df['year'] = pd.to_datetime(df.Date).dt.year

In [None]:
#2
print(*df.columns[df.isna().any()], sep='\n')

In [None]:
# 3
keep_columns = ['day','month','year', 'Rainfall','Sunshine']

df2 = df[keep_columns]
# or
df2 = df.drop([x for x in df.columns if x not in keep_columns], axis=1, inplace=False)

In [None]:
# 4 for missing Rainfall values
#   1. If `Sunshine` is over 7.0, set `Rainfall` = 0
#   2. Else (including if `Sunshine is NaN`) set `Rainfall` to the mean `Rainfall` value.


# we need to do this up front so we're not constantly recalculating the mean
mean = df2['Rainfall'].mean()

df2.loc[(pd.isna(df2.Rainfall)) & (df2.Sunshine > 7.0), 'Rainfall'] = 0
df2.loc[(pd.isna(df2.Rainfall)) & ((df2.Sunshine <= 7.0) | pd.isna(df2.Sunshine)), 'Rainfall'] = mean

# or 
df2.loc[pd.isna(df2.Rainfall), 'Rainfall'] = (
    df2.iloc[df2.loc[pd.isna(df2.Rainfall), 'Rainfall'].index]
    .Sunshine.apply(lambda x: 0 if x > 7.0 else mean)
)

# or
df2['Rainfall'].fillna(df2['Sunshine'].apply(lambda x: 0 if x > 7 else mean), inplace=True)


In [None]:
df2.Rainfall.isna().sum()

## Visualization (15 Minutes)

As best you can, recreate this bar chart created in Google Sheets using Python.  You may use Matplotlib, Seaborn, or any other graphing library you came across.
<img src="chart.jpg" alt="chart to recreate" width="500"/>

In [None]:
category = ['A', 'B', 'C', 'D', 'E']
count = [54, 23, 99, 37, 3]

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(1,1,1)
ax.set_axisbelow(True)
plt.gca().xaxis.grid(True)
plt.barh(y=category, width=count, color='red')
plt.title('Count By Cateogry')
plt.xlabel('Count')
plt.ylabel('Category')
plt.xticks([0, 25, 50, 75, 100])

plt.show()

## Github (5 Minutes)

Follow the instructions at https://github.com/enkeboll/fis-dc-ds-060319-gifs, and make sure to check your email to confirm your collaborator status!

# Assessment submission (2 Minutes)
Please save this file as `firstname.ipynb` and upload it using [this form](https://docs.google.com/forms/d/e/1FAIpQLSdlnMgc5Nb5uFTmw7DiScapevg-VA8DQdiFeIAil5sZqYZbFA/viewform)