In [15]:
import numpy as np
import pandas as pd

from numpy.random import randint

np.random.seed(321)

companies = ["Jolly Good Toys Ltd",
              "Pristine Machines Inc",
              "Piggy Bank Corporation",
              "Cars & Gears Foundation"]

areas = ["Europe", "North_America", "South_America", "Asia", "Africa", "Other"]

sales = randint(99999, size=(4, 6))

df = pd.DataFrame(sales, companies, areas)


In [16]:
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537
Pristine Machines Inc,24744,69018,99288,14068,90469,94536
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335


In [17]:
# Filter only one column
# If only one column, result is Series
col_df = df["Europe"]
col_df.to_frame()

Unnamed: 0,Europe
Jolly Good Toys Ltd,97268
Pristine Machines Inc,24744
Piggy Bank Corporation,28243
Cars & Gears Foundation,57643


In [18]:
# Filter three columns --> result in DataFrame
cols_df = df[["North_America", "South_America"]]
cols_df

Unnamed: 0,North_America,South_America
Jolly Good Toys Ltd,6682,4220
Pristine Machines Inc,69018,99288
Piggy Bank Corporation,44245,21633
Cars & Gears Foundation,46518,71742


In [19]:
# Print colums names
df.columns

Index(['Europe', 'North_America', 'South_America', 'Asia', 'Africa', 'Other'], dtype='object')

In [20]:
# Make new column with combined sales
df['TotalSales'] = df['North_America'] + df['South_America'] + df['Africa'] \
+ df['Asia'] + df['Europe'] + df['Other']

In [21]:
# Print new Dataframe
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


In [22]:
# You can create new column also by dividing 
df['MonthlySales'] = round(df['TotalSales'] / 12, 2)

In [23]:
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales,MonthlySales
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443,20786.92
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123,32676.92
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723,21310.25
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573,35131.08


<h3>Deleting/dropping unneeded columns</h3>

In [24]:
# A typical pandas-notebook starts with a list of columns we want to drop/delete
# remember to use axis=1 for columns, otherwise code tries to drop a row
# and we don't have a ROW called 'MonthlySales'
df = df.drop('MonthlySales', axis=1)

In [25]:
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


<b>Getting rows by index</b>

In [26]:
# Type 1
row = df.loc['Piggy Bank Corporation']
row.to_frame()

Unnamed: 0,Piggy Bank Corporation
Europe,28243
North_America,44245
South_America,21633
Asia,78619
Africa,45594
Other,37389
TotalSales,255723


In [27]:
# Type 2: get row by index
row = df.iloc[3]
row.to_frame()

Unnamed: 0,Cars & Gears Foundation
Europe,57643
North_America,46518
South_America,71742
Asia,99722
Africa,48613
Other,97335
TotalSales,421573


In [29]:
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


In [28]:
# Filter by total sales
filter_df = df[df['TotalSales'] > 300000]
filter_df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


In [31]:
# You can use multiple conditions too if you wish
# for example: df[(df['Europe']) > 30000 & (df['Asia] > 20000)]
# but it's often easier to filter step by step, for example:

# set 1 --> filter out all companies with less sales than
df2 = df[df['Europe'] > 25000]
df2

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


In [32]:
# our filtered result is now in df2
# from this point forward, we keep filtering df2 further

# step 2 => from the filtered result, filter out companies with less sales 
# in asia 50000
df2 = df2[df2['Asia'] > 50000]
df2

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


In [33]:
# final step => from the remaining companies, filter out companies
# with less sales in Other than 40000
df2 = df2[df2['Other'] > 40000]
df2

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573


<h3>Using a custom function to modify or add new data</h3>

In [36]:
# Helper function to generate new column of data
def create_sales_category(row):
    total_sales = row['TotalSales']

    if total_sales < 250000:
        return "Small"
    elif 250000 <= total_sales <= 350000:
        return "Medium"
    else:
        return "Large"

In [37]:
# Use function to create new data with pandas
# Create new column 'CompanySize' -> use create_sales_category function
# For each row in data
df['CompanySize'] = df.apply(create_sales_category, axis=1)

In [38]:
df

Unnamed: 0,Europe,North_America,South_America,Asia,Africa,Other,TotalSales,CompanySize
Jolly Good Toys Ltd,97268,6682,4220,43807,5929,91537,249443,Small
Pristine Machines Inc,24744,69018,99288,14068,90469,94536,392123,Large
Piggy Bank Corporation,28243,44245,21633,78619,45594,37389,255723,Medium
Cars & Gears Foundation,57643,46518,71742,99722,48613,97335,421573,Large
