In [1]:
import pandas as pd
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
df = pd.DataFrame(weather_data)

In [2]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:

df.shape # rows, columns = df.shape

(6, 4)

In [4]:

df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [5]:
type(df['day'])

pandas.core.series.Series

In [6]:
df[['day','temperature']]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
2,1/3/2017,28
3,1/4/2017,24
4,1/5/2017,32
5,1/6/2017,31


In [7]:
df['temperature'].max()

35

In [8]:
df[df['temperature']>32]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [9]:

df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

1    1/2/2017
Name: day, dtype: object

In [10]:
df[df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [11]:
df['temperature'].std()

3.8297084310253524

In [12]:
df['event'].max() # But mean() won't work since data type is string

'Sunny'

In [13]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [14]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [15]:
df.set_index('day', inplace=True)
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [16]:
df.index

Index(['1/1/2017', '1/2/2017', '1/3/2017', '1/4/2017', '1/5/2017', '1/6/2017'], dtype='object', name='day')

In [17]:
df.loc['1/2/2017']

temperature       35
windspeed          7
event          Sunny
Name: 1/2/2017, dtype: object

In [18]:
df.reset_index(inplace=True)
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


### Different Ways of Creating Data Frame

In [19]:
# df = pd.read_csv("weather_data.csv")

In [20]:
# df=pd.read_excel("weather_data.xlsx","Sheet1")


In [21]:
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017'],
    'temperature': [32,35,28],
    'windspeed': [6,7,2],
    'event': ['Rain', 'Sunny', 'Snow']
}
df = pd.DataFrame(weather_data)

In [22]:
weather_data = [
    ('1/1/2017',32,6,'Rain'),
    ('1/2/2017',35,7,'Sunny'),
    ('1/3/2017',28,2,'Snow')
]
df = pd.DataFrame(data=weather_data, columns=['day','temperature','windspeed','event'])
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [23]:
weather_data = [
    {'day': '1/1/2017', 'temperature': 32, 'windspeed': 6, 'event': 'Rain'},
    {'day': '1/2/2017', 'temperature': 35, 'windspeed': 7, 'event': 'Sunny'},
    {'day': '1/3/2017', 'temperature': 28, 'windspeed': 2, 'event': 'Snow'},
    
]
df = pd.DataFrame(data=weather_data, columns=['day','temperature','windspeed','event'])
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [24]:
# df = pd.read_csv("stock_data.csv",  nrows=2) 
# limits the number of rows read

In [26]:
# adding NaN where values maybe missing
# df = pd.read_csv("stock_data.csv", na_values=["n.a.", "not available"])
# looks for values which needs to be replaced from the list 

In [27]:
#df = pd.read_csv("stock_data.csv",  na_values={
#        'eps': ['not available'],
#        'revenue': [-1],
#       'people': ['not available','n.a.']
#    })

##NaN as per values from each columns

In [30]:
def convert_people_cell(cell):
    if cell=="n.a.":
        return 'Sam Walton'
    return cell

def convert_price_cell(cell):
    if cell=="n.a.":
        return 50
    return cell
    
#df = pd.read_excel("stock_data.xlsx","Sheet1", converters= {
#        'people': convert_people_cell,
#        'price': convert_price_cell
#    })
    
 #### using converters to assign functions to particular columns