In [4]:
# Dealing with Rows And Columns
import pandas as pd
# create a dataframe and store data from csv into the dataframe
df = pd.read_csv("weather_data.csv")

# Alternatively:
# weather_data = {
#     "key": ["value1", "value2"],
#     "key": [3, 7],
# }
# df = pd.DataFrame(weather_data)

In [5]:
# see content of the above data frame
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [6]:
# shape: dimensions
df.shape

(6, 4)

In [8]:
rows, columns = df.shape

In [9]:
# print rows
rows

6

In [10]:
# print columns
columns

4

In [15]:
# print initial few rows: 2 rows
# last two rows: df.tail(2)
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [16]:
# print everything: df or df[:]
# print row no 2-4: slicing i.e
df[2:5]

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [19]:
# print columns:
df.columns
# print content of an individual column like day:
df.day # or df["day"]

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [20]:
type(df['event'])

pandas.core.series.Series

In [22]:
# print specific columns
df[['event', 'day', 'temperature']]

Unnamed: 0,event,day,temperature
0,Rain,1/1/2017,32
1,Sunny,1/2/2017,35
2,Snow,1/3/2017,28
3,Snow,1/4/2017,24
4,Rain,1/5/2017,32
5,Sunny,1/6/2017,31


In [24]:
# Operations
# standard deviation of temperature in the given dataset
# df['temperature'].std()
# min temperature in the given dataset
# df['temperature'].min()
# max temperature in the given dataset
df['temperature'].max()

35

In [25]:
# DPrint the statistics on the dataset
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [26]:
# Conditionally select the data in your dataframe
# e.g select all rows where temperature >= 32
df[df.temperature>=32]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain


In [28]:
# Print entire column where temperature was maximum
df[df.temperature==df.temperature.max()] # or df['temperature'].max()

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [30]:
# Print the 'day' and temperature column containining the maximum temperature
df[['day', 'temperature']][df.temperature==df.temperature.max()]

Unnamed: 0,day,temperature
1,1/2/2017,35


In [31]:
# Set Index
df.index

RangeIndex(start=0, stop=6, step=1)

In [47]:
# Change index of dataframe to actual date
# does not modify the original dataframe
# df.set_index('day')
# Reset datafrmae back to original state
# df.reset_index(inplace=True)
# modifies the original dataframe
df.set_index('day', inplace=True)

In [44]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [48]:
# Print a row on a certain date
df.loc['1/3/2017']

temperature      28
windspeed         2
event          Snow
Name: 1/3/2017, dtype: object