# Part01

In [1]:
import numpy as np
import pandas as pd

In [2]:
# read the data set into a pandas dataframe
f500 = pd.read_csv("f500.csv", index_col=0)
f500.index.name = None

# replace 0 values in the "previous_rank" column with NaN
f500.loc[f500["previous_rank"] == 0, "previous_rank"] = np.nan

f500_selection = f500[['rank', 'revenues', 'revenue_change']].head()

In [3]:
f500_selection

Unnamed: 0,rank,revenues,revenue_change
Walmart,1,485873,0.8
State Grid,2,315199,-4.4
Sinopec Group,3,267518,-9.1
China National Petroleum,4,262573,-12.3
Toyota Motor,5,254694,7.7


![csv](figs/csv_to_dataframe.svg)

# Part02

In [4]:
f500 = pd.read_csv('f500.csv')

f500.loc[f500['previous_rank'] == 0, 'previous_rank'] = np.nan

f500[['company', 'rank', 'revenues']].head()

Unnamed: 0,company,rank,revenues
0,Walmart,1,485873
1,State Grid,2,315199
2,Sinopec Group,3,267518
3,China National Petroleum,4,262573
4,Toyota Motor,5,254694


![Selection loc](figs/selection_loc.svg)


![Selection iloc](figs/selection_iloc.svg)

# Part03

In [5]:
fifth_row = f500.iloc[4]
company_value = f500.iloc[0, 0]

print(fifth_row)

company                                     Toyota Motor
rank                                                   5
revenues                                          254694
revenue_change                                       7.7
profits                                          16899.3
assets                                            437575
profit_change                                      -12.3
ceo                                          Akio Toyoda
industry                        Motor Vehicles and Parts
sector                            Motor Vehicles & Parts
previous_rank                                        8.0
country                                            Japan
hq_location                                Toyota, Japan
website                     http://www.toyota-global.com
years_on_global_500_list                              23
employees                                         364445
total_stockholder_equity                          157210
Name: 4, dtype: object


In [6]:
print(company_value)

Walmart


# Part04

In [7]:
first_three_rows = f500.iloc[:3]
first_seventh_row_slice = f500.iloc[[0, 6], :5]

# Part05

In [8]:
rev_is_null = f500["revenue_change"].isnull()
print(rev_is_null.head())

0    False
1    False
2    False
3    False
4    False
Name: revenue_change, dtype: bool


In [9]:
rev_change_null = f500[rev_is_null]
print(rev_change_null[["company","country","sector"]])

                        company  country      sector
90                       Uniper  Germany      Energy
180  Hewlett Packard Enterprise      USA  Technology


In [10]:
null_previous_rank = f500.loc[f500['previous_rank'].isnull(), ['company', 'rank', 'previous_rank']]
print(f500['previous_rank'].isnull().value_counts())

False    467
True      33
Name: previous_rank, dtype: int64


# Part06

In [11]:
null_previous_rank = f500[f500["previous_rank"].isnull()]


In [12]:
null_previous_rank[['company', 'rank', 'previous_rank']].head()

Unnamed: 0,company,rank,previous_rank
48,Legal & General Group,49,
90,Uniper,91,
123,Dell Technologies,124,
138,Anbang Insurance Group,139,
140,Albertsons Cos.,141,


In [13]:
top5_null_prev_rank = null_previous_rank.iloc[:5]
top5_null_prev_rank

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
48,Legal & General Group,49,105235,442.3,1697.9,577954,3.4,Nigel Wilson,"Insurance: Life, Health (stock)",Financials,,Britain,"London, Britain",http://www.legalandgeneralgroup.com,17,8939,8579
90,Uniper,91,74407,,-3557.5,51541,,Klaus Schafer,Energy,Energy,,Germany,"Dusseldorf, Germany",http://www.uniper.energy,1,12890,12889
123,Dell Technologies,124,64806,18.1,-1672.0,118206,,Michael S. Dell,"Computers, Office Equipment",Technology,,USA,"Round Rock, TX",http://www.delltechnologies.com,17,138000,13243
138,Anbang Insurance Group,139,60800,124.0,3883.9,430040,0.9,Wu Xiaohui,"Insurance: Life, Health (Mutual)",Financials,,China,"Beijing, China",http://www.anbanggroup.com,1,40707,20372
140,Albertsons Cos.,141,59678,1.6,-373.3,23755,,Robert G. Miller,Food and Drug Stores,Food & Drug Stores,,USA,"Boise, ID",http://www.albertsons.com,13,273000,1371


# Part07

In [14]:
previously_ranked = f500[f500['previous_rank'].notnull()]
rank_change = previously_ranked['previous_rank'] - previously_ranked['rank']
print(rank_change.shape)
print(rank_change.tail(3))
# %%


(467,)
496   -70.0
497   -61.0
498   -32.0
dtype: float64


In [15]:
f500['rank_change'] = rank_change

# Part08

In [16]:
large_revenue = f500['revenues'] > 100_000
negative_profits = f500['profits'] < 0
combined = large_revenue & negative_profits
big_rev_neg_profit = f500[combined]

# Part09

In [17]:
brazil_venezuela = f500[(f500['country'] == 'Brazil') | (f500['country'] == 'Venezuela')]

# %%
tech_outside_usa = f500[(f500['sector'] == 'Technology') & (f500['country'] != 'USA')].head()

# Part10

Let's continue by answering more complex questions about our data set. Suppose we wanted to find the company that employs the most people in China. We can accomplish this by first selecting all of the rows where the `country` column equals `China`:

```python
selected_rows = f500[f500["country"] == "China"]
```

In [18]:
top_japanese_employer = f500[f500['country'] == 'Japan'].sort_values('employees',  ascending=False).iloc[0, 0]

# Part11

In [19]:
top_employer_by_country = {}
countries = f500['country'].unique()
# print(countries)

for country in countries:
    selected_row = f500[f500['country'] == country].sort_values('employees', ascending=False).iloc[0]
    top_employer_by_country[country] = selected_row['company']


# Part12

In [20]:
# step 1
f500['roa'] = f500['profits'] / f500['assets']
# print(f500['profits assets roa'.split()].head())

# step 2
top_roa_by_sector = {}
sectors = f500['sector']
for sector in sectors:
    selected_row = f500[f500['sector'] == sector].sort_values('roa', ascending=False).iloc[0]
    top_roa_by_sector[sector] = selected_row['company']