In [27]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load 
avocados = "Resources/avocado.csv"

# Read the Avocado Data
avocado_data = pd.read_csv(avocados).rename(columns={"Unnamed: 0": "Index","4046":"Small Hass","4225":"Large Hass","4770":"XLarge Hass"})
#preview avocado DF      
avocado_data.head()
                                          

Unnamed: 0,Index,Date,AveragePrice,Total Volume,Small Hass,Large Hass,XLarge Hass,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


### Start Finding Math

In [28]:
#total Avocado Volume
total_volume = (avocado_data["Total Volume"]).sum()
# print(f"Total Volume of Avocados Sold = {total_volume}")

#Total Small Avocados Sold
total_small_hass = (avocado_data["Small Hass"]).sum()
# print(f"Total Volume of Small Avocados Sold = {total_small_hass}")

#Total Large Avocados Sold
total_large_hass = (avocado_data["Large Hass"]).sum()
# print(f"Total Volume of Large Avocados Sold = {total_large_hass}")

#Total X-tra Large Avocados Sold
total_xlarge_hass = (avocado_data["XLarge Hass"]).sum()
# print(f"Total Volume of Large Avocados Sold = {total_xlarge_hass}")

#Total Average Price of Avocados
overall_avg_price = (avocado_data["AveragePrice"]).mean()
# print(f"Overall Average Price of Avocados is ${overall_avg_price}")

#Minimum Average Price of Avocados
min_avg_price = (avocado_data["AveragePrice"]).min()
# print(f"Minimum Average Price of Avocados is ${min_avg_price}")

#Maximum Average Price of Avocados
max_avg_price = (avocado_data["AveragePrice"]).max()
# print(f"Maximum Average Price of Avocados is ${max_avg_price}")

# Standard Deviation of Average Price of Avocados
std = (avocado_data["AveragePrice"]).sem()
# print(f"Standard Deviation on Price is {std}")

In [29]:
## Average Price of Avocado's by Region

In [30]:
# Sort avocado data by region
region_data = avocado_data.groupby('region')

# Average price of avocados by region
regional_mean = region_data['AveragePrice'].mean()

# Avocado min by region
regional_min = region_data['AveragePrice'].min()

# Avocado max by region
regional_max = region_data['AveragePrice'].max()

# Total difference in min/max by region
difference = regional_max - regional_min

# Standard deviation of Avocado Prices by Region
regional_std = region_data['AveragePrice'].sem()

In [31]:
avocado_summary = pd.merge(regional_mean,regional_min, on='region',how='outer').rename(columns={"AveragePrice_x": "Average Price","AveragePrice_y":"Minimum Price"})
avocado_summary = pd.merge(avocado_summary,regional_max, on='region',how='outer').rename(columns={"AveragePrice": "Maximum Price"})
avocado_summary = pd.merge(avocado_summary,difference, on='region',how='outer').rename(columns={"AveragePrice": "Min/Max Delta"})
avocado_summary = pd.merge(avocado_summary,regional_std, on='region',how='outer').rename(columns={"AveragePrice": "Standard Deviation"})
avocado_summary = avocado_summary.style.format({'Average Price':'${:,.2f}',
                             'Minimum Price':'${:,.2f}',
                             'Maximum Price':'${:,.2f}',})
avocado_summary

Unnamed: 0_level_0,Average Price,Minimum Price,Maximum Price,Min/Max Delta,Standard Deviation
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albany,$1.56,$0.85,$2.13,1.28,0.0158819
Atlanta,$1.34,$0.62,$2.75,2.13,0.0216645
BaltimoreWashington,$1.53,$0.95,$2.28,1.33,0.0162867
Boise,$1.35,$0.58,$2.79,2.21,0.0249071
Boston,$1.53,$0.85,$2.19,1.34,0.0178449
BuffaloRochester,$1.52,$1.03,$2.57,1.54,0.0149233
California,$1.40,$0.67,$2.58,1.91,0.0212428
Charlotte,$1.61,$0.80,$2.83,2.03,0.0225454
Chicago,$1.56,$0.70,$2.30,1.6,0.0182455
CincinnatiDayton,$1.21,$0.44,$2.20,1.76,0.0191094


In [32]:
# Grouping Avocado Data by Type of Avocado
type_data = avocado_data.groupby('type')

# Average Price of Avocados by Type
average_price_type = type_data['AveragePrice'].mean()

#Minimum Average Price
min_type_price = type_data['AveragePrice'].min()

#Maximum Average Price
max_type_price = type_data['AveragePrice'].max()

#Price Delta
delta_type_price = max_type_price - min_type_price

#Standard Deviation
std_type_price = type_data['AveragePrice'].sem()

In [33]:
type_df = pd.merge(average_price_type,min_type_price, on='type', how='outer').rename(columns={"AveragePrice_x": "Average Price","AveragePrice_y":"Minimum Price"})
type_df = pd.merge(type_df, max_type_price, on='type', how='outer').rename(columns={"AveragePrice": "Maximum Price"})
type_df = pd.merge(type_df, delta_type_price, on='type', how='outer').rename(columns={"AveragePrice": "Min/Max Delta"})
type_df = pd.merge(type_df, std_type_price, on='type', how='outer').rename(columns={"AveragePrice": "Standard Deviation"})


type_df.reset_index(drop=True)
type_df

Unnamed: 0_level_0,Average Price,Minimum Price,Maximum Price,Min/Max Delta,Standard Deviation
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
conventional,1.15804,0.46,2.22,1.76,0.002753
organic,1.653999,0.44,3.25,2.81,0.003806


In [34]:
# Grouping Avocado Data by Type of Avocado
combo_data = avocado_data.groupby(['type','region'])
combo_data.head([0])

Unnamed: 0,Index,Date,AveragePrice,Total Volume,Small Hass,Large Hass,XLarge Hass,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region


In [35]:
# Average Price of Avocados by Type
combo_average_price_type = combo_data['AveragePrice'].mean()

# Minimum Average Price
combo_min_type_price = combo_data['AveragePrice'].min()

#Maximum Average Price
combo_max_type_price = combo_data['AveragePrice'].max()

#Delta
combo_delta_type_price = combo_max_type_price - combo_min_type_price

# Combo Standard Deviation
combo_std_type_price = combo_data['AveragePrice'].sem()

In [36]:
combo_df = (pd.merge(combo_average_price_type,combo_min_type_price, on=['type','region'], how='outer').rename(columns={"AveragePrice_x": "Average Price","AveragePrice_y":"Minimum Price"}))
combo_df = pd.merge(combo_df, combo_max_type_price, on=['type','region'], how='outer').rename(columns={"AveragePrice": "Maximum Price"})
combo_df = pd.merge(combo_df, combo_delta_type_price, on=['type','region'], how='outer').rename(columns={"AveragePrice": "Min/Max Delta"})
combo_df = pd.merge(combo_df, combo_std_type_price, on=['type','region'], how='outer').rename(columns={"AveragePrice": "Standard Deviation"})

combo_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Price,Minimum Price,Maximum Price,Min/Max Delta,Standard Deviation
type,region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
conventional,Albany,1.348757,0.85,1.80,0.95,0.016341
conventional,Atlanta,1.068817,0.68,1.64,0.96,0.013721
conventional,BaltimoreWashington,1.344201,0.95,1.86,0.91,0.016532
conventional,Boise,1.076036,0.58,1.89,1.31,0.018940
conventional,Boston,1.304379,0.85,1.81,0.96,0.016823
conventional,BuffaloRochester,1.382308,1.04,1.71,0.67,0.010964
conventional,California,1.105444,0.67,1.78,1.11,0.017664
conventional,Charlotte,1.275089,0.80,1.77,0.97,0.015175
conventional,Chicago,1.369349,0.70,2.22,1.52,0.024841
conventional,CincinnatiDayton,1.015503,0.60,1.90,1.30,0.017997
