## Introduction to the Data

In [1]:
import pandas as pd
import numpy as np

In [5]:
f500 = pd.read_csv('f500.csv')
f500.set_index('company', inplace=True)

In [7]:
f500_head = f500.head(10)
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int64  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

## Vectorized Operations

In [8]:
rank_change = f500['previous_rank'] - f500['rank']

In [12]:
rank_change.value_counts(bins=10)

(-64.4, 8.2]          318
(8.2, 80.8]           111
(-137.0, -64.4]        24
(-500.727, -427.4]     19
(153.4, 226.0]          7
(80.8, 153.4]           7
(-209.6, -137.0]        6
(-427.4, -354.8]        5
(-282.2, -209.6]        2
(-354.8, -282.2]        1
dtype: int64

## Series Data Exploration Methods

In [13]:
rank_change_max = rank_change.max()
rank_change_max

226

In [14]:
rank_change_min = rank_change.min()
rank_change_min

-500

## Series Describe Method

In [16]:
rank = f500['rank']
rank
rank_desc = rank.describe()
rank_desc

count    500.000000
mean     250.500000
std      144.481833
min        1.000000
25%      125.750000
50%      250.500000
75%      375.250000
max      500.000000
Name: rank, dtype: float64

In [17]:
prev_rank = f500['previous_rank']
prev_rank_desc = prev_rank.describe()
prev_rank_desc

count    500.000000
mean     222.134000
std      146.941961
min        0.000000
25%       92.750000
50%      219.500000
75%      347.250000
max      500.000000
Name: previous_rank, dtype: float64

## Method Chaining

In [19]:
zero_previous_rank = sum(f500['previous_rank'] == 0)
zero_previous_rank

33

## Dataframe Exploration Methods

![Junpyter](./axis_param.svg)

In [23]:
max_f500 = f500.select_dtypes(exclude='object').max()

In [24]:
max_f500 = f500.max(numeric_only=True)

## Dataframe Describe Method

In [25]:
f500.describe()

Unnamed: 0,rank,revenues,revenue_change,profits,assets,profit_change,previous_rank,years_on_global_500_list,employees,total_stockholder_equity
count,500.0,500.0,498.0,499.0,500.0,436.0,500.0,500.0,500.0,500.0
mean,250.5,55416.358,4.538353,3055.203206,243632.3,24.152752,222.134,15.036,133998.3,30628.076
std,144.481833,45725.478963,28.549067,5171.981071,485193.7,437.509566,146.941961,7.932752,170087.8,43642.576833
min,1.0,21609.0,-67.3,-13038.0,3717.0,-793.7,0.0,1.0,328.0,-59909.0
25%,125.75,29003.0,-5.9,556.95,36588.5,-22.775,92.75,7.0,42932.5,7553.75
50%,250.5,40236.0,0.55,1761.6,73261.5,-0.35,219.5,17.0,92910.5,15809.5
75%,375.25,63926.75,6.975,3954.0,180564.0,17.7,347.25,23.0,168917.2,37828.5
max,500.0,485873.0,442.3,45687.0,3473238.0,8909.5,500.0,23.0,2300000.0,301893.0


## Assignment with pandas

In [27]:
f500.loc['Dow Chemical', 'ceo'] = 'Jim Fitterling'

## Using Boolean Indexing with pandas Objects

In [28]:
motor_bool = (f500['industry'] == "Motor Vehicles and Parts")

In [31]:
motor_countries = f500[motor_bool]['country']

## Using Boolean Arrays to Assign Values

In [32]:
import numpy as np
prev_rank_before = f500["previous_rank"].value_counts(dropna=False).head()
prev_rank_before

0      33
159     1
147     1
148     1
149     1
Name: previous_rank, dtype: int64

In [46]:
f500.loc[f500["previous_rank"] == 0, "previous_rank"] = np.nan

In [47]:
prev_rank_after = f500["previous_rank"].value_counts(dropna=False).head()
prev_rank_after

NaN      33
471.0     1
234.0     1
125.0     1
166.0     1
Name: previous_rank, dtype: int64

## Creating New Columns

In [49]:
f500['rank_change'] = f500['previous_rank'] - f500['rank']

rank_change_desc = f500['rank_change'].describe()

In [50]:
rank_change_desc

count    467.000000
mean      -3.533191
std       44.293603
min     -199.000000
25%      -21.000000
50%       -2.000000
75%       10.000000
max      226.000000
Name: rank_change, dtype: float64

## Challenge: Top Performers by Country

Create a series, industry_usa, containing counts of the two most common values in the industry column for companies headquartered in the USA.

In [62]:
industry_usa = f500[f500['country'] == 'USA']['industry'].value_counts().head(2)

Create a series, sector_china, containing counts of the three most common values in the sector column for companies headquartered in the China.

In [61]:
sector_china = f500[f500['country'] == 'China']['sector'].value_counts().head(3)