## The Notebook Interface

In [None]:
print("Hello World!")

In [None]:
import time
time.sleep(3)

In [None]:
def say_hello(recipient):
    return 'Hello, {}!'.format(recipient)

say_hello('Tim')

## Markdown

### This is a level 3 heading

This is some plain text that forms a paragraph. Add emphasis via **bold** and __bold__, or *italic* and _italic_. 

Paragraphs must be separated by an empty line. 

* Sometimes we want to include lists. 
* Which can be bulleted using asterisks. 

1. Lists can also be numbered. 
2. If we want an ordered list.

[It is possible to include hyperlinks](https://www.google.com/)

Inline code uses single backticks: foo(), and code blocks use triple backticks: 
```
bar()
``` 
Or can be indented by 4 spaces: 

    foo()

<!And finally, adding images is easy: ![Alt text](https://books.disney.com/content/uploads/2022/08/915a2Dr5L.jpg)>
<img src="https://books.disney.com/content/uploads/2022/08/915a2Dr5L.jpg"  width="300" height="300">


## Kernels

In [None]:
import numpy as np
def square(x):
    return x * x

In [None]:
x = np.random.randint(1, 10)
y = square(x)
print('%d squared is %d' % (x, y))

In [None]:
print('Is %d squared %d?' % (x, y))

In [None]:
y = 10
print('Is %d squared is %d?' % (x, y))

## Example Analysis

In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
sns.set(style="darkgrid")

In [None]:
df = pd.read_csv('fortune500.csv')

In [None]:
df.head() #to see the top of the data

In [None]:
df.tail() #to see the end

In [None]:
df.columns = ['year', 'rank', 'company', 'revenue', 'profit']

In [None]:
len(df) #length of dataset

In [None]:
df.dtypes

In [None]:
non_numberic_profits = df.profit.str.contains('[^0-9.-]')
df.loc[non_numberic_profits].head() #to see what's causing it to not be a int64

In [None]:
set(df.profit[non_numberic_profits]) #adds all these things into a set

In [None]:
len(df.profit[non_numberic_profits]) #check how many there are

In [None]:
bin_sizes, _, _ = plt.hist(df.year[non_numberic_profits], bins=range(1955, 2006))

In [None]:
df = df.loc[~non_numberic_profits]
df.profit = df.profit.apply(pd.to_numeric) 
#removing the rows with N/A

In [None]:
len(df)

In [None]:
df.dtypes

### Ploting with Matplotlib

In [None]:
group_by_year = df.loc[:, ['year', 'revenue', 'profit']].groupby('year')
avgs = group_by_year.mean()
x = avgs.index
y1 = avgs.profit
def plot(x, y, ax, title, y_label):
    ax.set_title(title)
    ax.set_ylabel(y_label)
    ax.plot(x, y)
    ax.margins(x=0, y=0)

In [None]:
fig, ax = plt.subplots()
plot(x, y1, ax, 'Increase in mean Fortune 500 company profits from 1955 to 2005', 'Profit (millions)')

In [None]:
y2 = avgs.revenue
fig, ax = plt.subplots()
plot(x, y2, ax, 'Increase in mean Fortune 500 company revenues from 1955 to 2005', 'Revenue (millions)')

In [None]:
def plot_with_std(x, y, stds, ax, title, y_label):
    ax.fill_between(x, y - stds, y + stds, alpha=0.2)
    plot(x, y, ax, title, y_label)
fig, (ax1, ax2) = plt.subplots(ncols=2)
title = 'Increase in mean and std Fortune 500 company %s from 1955 to 2005'
stds1 = group_by_year.std().profit.values
stds2 = group_by_year.std().revenue.values
plot_with_std(x, y1.values, stds1, ax1, title % 'profits', 'Profit (millions)')
plot_with_std(x, y2.values, stds2, ax2, title % 'revenues', 'Revenue (millions)')
fig.set_size_inches(14, 4)
fig.tight_layout()