# Most important pandas takeaways

Easy access codes

### Import the necessary libraries

In [2]:
import pandas as pd

### Import the data

In [40]:
# dataset based on European soccer teams
url_euro_teams = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv'
euro_teams = pd.read_csv(url_euro_teams, sep = ',')

# dataset based on Chipotle orders
url_chipo = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'
chipo = pd.read_csv(url_chipo, sep = '\t')


### Get a feel for what dataset looks like.

In [26]:
# assign which dataset we're looking at
test_data = euro_teams

# See first 10 entries
test_data.head()

# Row count
rows = test_data.shape[0]
print("Number of rows: ", str(rows))

# Column count
columns = test_data.shape[1]
print("Number of columns: ", str(columns))

# Column names
print("Column names: ", str(test_data.columns))

# Indexing method
print("Index method: ", str(chipo.index))

# Data types of all columns
print("Data types for entire dataframe: ")
test_data.info()

# Data type of particular column
print("Data type of Goals column specifically is: ", str(euro_teams.Goals.dtype))


Number of rows:  16
Number of columns:  35
Column names:  Index(['Team', 'Goals', 'Shots on target', 'Shots off target',
       'Shooting Accuracy', '% Goals-to-shots', 'Total shots (inc. Blocked)',
       'Hit Woodwork', 'Penalty goals', 'Penalties not scored', 'Headed goals',
       'Passes', 'Passes completed', 'Passing Accuracy', 'Touches', 'Crosses',
       'Dribbles', 'Corners Taken', 'Tackles', 'Clearances', 'Interceptions',
       'Clearances off line', 'Clean Sheets', 'Blocks', 'Goals conceded',
       'Saves made', 'Saves-to-shots ratio', 'Fouls Won', 'Fouls Conceded',
       'Offsides', 'Yellow Cards', 'Red Cards', 'Subs on', 'Subs off',
       'Players Used'],
      dtype='object')
Index method:  RangeIndex(start=0, stop=4622, step=1)
Data types for entire dataframe: 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 35 columns):
Team                          16 non-null object
Goals                         16 non-null int64
Shots on 

### Get summaries on certain columns of interest

In [30]:
# The most frequently ordered item in general
c = chipo.groupby('item_name')
c = c.sum()
c = c.sort_values(['quantity'], ascending=False)
print("The most frequently order item was: ")
print (c.head(1))

# The most frequently ordered subchoice:
c = chipo.groupby('choice_description').sum()
c = c.sort_values(['quantity'], ascending=False)
print("\nThe most frequently ordered subchoice was: ")
print (c.head(1))

# Can summarize one column this way
total_items_orders = chipo.quantity.sum()
print("\nThe sum of the quanitity column is: ", str(total_items_orders))

The most frequently order item was: 
              order_id  quantity
item_name                       
Chicken Bowl    713926       761

The most frequently ordered subchoice was: 
                    order_id  quantity
choice_description                    
[Diet Coke]           123455       159

The sum of the quanitity column is:  4972


### Lambda functions

In [43]:
# Tiny function that gets rid of '$' and turns str into a float
dollarizer = lambda x: float(x[1:-1])

print ("Item price before being dollarized: \n", str(chipo.item_price[:4]))
chipo = chipo.assign(item_price2 = chipo.item_price.apply(dollarizer))
print ("\n Item price after being dollarized: \n", str(chipo.item_price2[:4]))

Item price before being dollarized: 
 0    $2.39 
1    $3.39 
2    $3.39 
3    $2.39 
Name: item_price, dtype: object

 Item price after being dollarized: 
 0    2.39
1    3.39
2    3.39
3    2.39
Name: item_price2, dtype: float64
