## Weather Dataset

The Weather Dataset is a time-series data set with per-hour information about weather conditions at a particular location. It records temperature, dew point temperature, relative humidity, wind speed, visibility, pressure and conditions.


In [2]:
import pandas as pd


### Import the Data

In [3]:
data = pd.read_csv('WeatherData.csv')
data

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog
...,...,...,...,...,...,...,...,...
8779,12/31/2012 19:00,0.1,-2.7,81,30,9.7,100.13,Snow
8780,12/31/2012 20:00,0.2,-2.4,83,24,9.7,100.03,Snow
8781,12/31/2012 21:00,-0.5,-1.5,93,28,4.8,99.95,Snow
8782,12/31/2012 22:00,-0.2,-1.8,89,28,9.7,99.91,Snow


### Analyze the DataFrame

In [4]:
# Shows the first n (Default 5) rows
data.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog


In [6]:
# Shows the total number of rows and columns of the dataframe
data.shape

(8784, 8)

In [7]:
# Provides the index of the dataframe
data.index

RangeIndex(start=0, stop=8784, step=1)

In [8]:
# Shows the name of each column
data.columns

Index(['Date/Time', 'Temp_C', 'Dew Point Temp_C', 'Rel Hum_%',
       'Wind Speed_km/h', 'Visibility_km', 'Press_kPa', 'Weather'],
      dtype='object')

In [9]:
# Provides the data-type of each column
data.dtypes

Date/Time            object
Temp_C              float64
Dew Point Temp_C    float64
Rel Hum_%             int64
Wind Speed_km/h       int64
Visibility_km       float64
Press_kPa           float64
Weather              object
dtype: object

In [10]:
# Shows all unique values, can only be applied to one column and NOT the dataframe
data['Dew Point Temp_C'].unique()

array([ -3.9,  -3.7,  -3.4,  -3.2,  -3.3,  -3.1,  -3.6,  -2.3,  -2.1,
        -2. ,  -1.7,  -1.1,  -0.4,  -0.2,   0. ,   1. ,   1.3,   1.7,
         1.9,   2. ,   1.5,  -0.9,  -1.5,  -2.6,  -2.9,  -4.1,  -3.5,
        -6.2,  -6.5,  -6.8,  -7. ,  -8.7,  -9.5, -11.4, -12.1, -13.4,
       -12.8, -14.7, -14.1, -16. , -17.2, -15.8, -18.7, -20.1, -19.1,
       -19.3, -19.5, -21.3, -21.9, -22.2, -22.6, -22.4, -22.9, -23.2,
       -23.8, -24.8, -25.4, -24.6, -24.2, -24.1, -24. , -22.5, -20.6,
       -21.1, -21.7, -19. , -16.3, -15.5, -13.2, -12.6, -12.7, -11.6,
       -11.7, -10.2,  -8.3,  -7.7, -10.7,  -9.7,  -9.1, -10.1, -10. ,
        -9.6, -12. , -14.4, -15.4, -15.7, -14.8, -16.8, -17.3, -16.5,
       -16.1, -16.2, -16.7, -15.6, -14.3, -13.6, -13. , -12.5, -12.2,
       -11.1, -10.8, -10.5,  -9. ,  -8. ,  -7.8,  -7.2,  -6.9,  -6.1,
        -5.9,  -5.4,  -5.3,  -5. ,  -3. ,  -4.6,  -9.2, -10.3, -11.9,
       -12.4, -12.9, -13.3, -13.8, -13.9, -13.7, -14. , -14.5, -11. ,
        -6.6,  -5.8,

In [11]:
# Shows the total number of unique values for each column, can be applied to either one column OR the dataframe
data.nunique()

Date/Time           8784
Temp_C               533
Dew Point Temp_C     489
Rel Hum_%             83
Wind Speed_km/h       34
Visibility_km         24
Press_kPa            518
Weather               50
dtype: int64

In [13]:
# shows the total number of non-null values of each column (single OR dataframe)
data.count()

Date/Time           8784
Temp_C              8784
Dew Point Temp_C    8784
Rel Hum_%           8784
Wind Speed_km/h     8784
Visibility_km       8784
Press_kPa           8784
Weather             8784
dtype: int64

In [14]:
# Shows all the unique values with their count, it can only be applied to a single column
data['Weather'].value_counts()

Mainly Clear                               2106
Mostly Cloudy                              2069
Cloudy                                     1728
Clear                                      1326
Snow                                        390
Rain                                        306
Rain Showers                                188
Fog                                         150
Rain,Fog                                    116
Drizzle,Fog                                  80
Snow Showers                                 60
Drizzle                                      41
Snow,Fog                                     37
Snow,Blowing Snow                            19
Rain,Snow                                    18
Thunderstorms,Rain Showers                   16
Haze                                         16
Drizzle,Snow,Fog                             15
Freezing Rain                                14
Freezing Drizzle,Snow                        11
Freezing Drizzle                        

In [15]:
# Provides basic information about the dataframe
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8784 entries, 0 to 8783
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Date/Time         8784 non-null   object 
 1   Temp_C            8784 non-null   float64
 2   Dew Point Temp_C  8784 non-null   float64
 3   Rel Hum_%         8784 non-null   int64  
 4   Wind Speed_km/h   8784 non-null   int64  
 5   Visibility_km     8784 non-null   float64
 6   Press_kPa         8784 non-null   float64
 7   Weather           8784 non-null   object 
dtypes: float64(4), int64(2), object(2)
memory usage: 549.1+ KB


### Find all the unique 'Wind Speed' Values in the data

In [16]:
data['Wind Speed_km/h'].unique()

array([ 4,  7,  6,  9, 15, 13, 20, 22, 19, 24, 30, 35, 39, 32, 33, 26, 44,
       43, 48, 37, 28, 17, 11,  0, 83, 70, 57, 46, 41, 52, 50, 63, 54,  2],
      dtype=int64)

In [17]:
data['Wind Speed_km/h'].nunique()

34

### Find the number of times when the 'Weather is exactly clear'

In [18]:
# can use value count, filtering, or group by

# value count
data.Weather.value_counts()

Mainly Clear                               2106
Mostly Cloudy                              2069
Cloudy                                     1728
Clear                                      1326
Snow                                        390
Rain                                        306
Rain Showers                                188
Fog                                         150
Rain,Fog                                    116
Drizzle,Fog                                  80
Snow Showers                                 60
Drizzle                                      41
Snow,Fog                                     37
Snow,Blowing Snow                            19
Rain,Snow                                    18
Thunderstorms,Rain Showers                   16
Haze                                         16
Drizzle,Snow,Fog                             15
Freezing Rain                                14
Freezing Drizzle,Snow                        11
Freezing Drizzle                        

In [21]:
# filtering
data[data.Weather == 'Clear']

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
67,1/3/2012 19:00,-16.9,-24.8,50,24,25.0,101.74,Clear
114,1/5/2012 18:00,-7.1,-14.4,56,11,25.0,100.71,Clear
115,1/5/2012 19:00,-9.2,-15.4,61,7,25.0,100.80,Clear
116,1/5/2012 20:00,-9.8,-15.7,62,9,25.0,100.83,Clear
117,1/5/2012 21:00,-9.0,-14.8,63,13,25.0,100.83,Clear
...,...,...,...,...,...,...,...,...
8646,12/26/2012 6:00,-13.4,-14.8,89,4,25.0,102.47,Clear
8698,12/28/2012 10:00,-6.1,-8.6,82,19,24.1,101.27,Clear
8713,12/29/2012 1:00,-11.9,-13.6,87,11,25.0,101.31,Clear
8714,12/29/2012 2:00,-11.8,-13.1,90,13,25.0,101.33,Clear


In [23]:
# groupby
data.groupby('Weather').get_group('Clear')

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
67,1/3/2012 19:00,-16.9,-24.8,50,24,25.0,101.74,Clear
114,1/5/2012 18:00,-7.1,-14.4,56,11,25.0,100.71,Clear
115,1/5/2012 19:00,-9.2,-15.4,61,7,25.0,100.80,Clear
116,1/5/2012 20:00,-9.8,-15.7,62,9,25.0,100.83,Clear
117,1/5/2012 21:00,-9.0,-14.8,63,13,25.0,100.83,Clear
...,...,...,...,...,...,...,...,...
8646,12/26/2012 6:00,-13.4,-14.8,89,4,25.0,102.47,Clear
8698,12/28/2012 10:00,-6.1,-8.6,82,19,24.1,101.27,Clear
8713,12/29/2012 1:00,-11.9,-13.6,87,11,25.0,101.31,Clear
8714,12/29/2012 2:00,-11.8,-13.1,90,13,25.0,101.33,Clear


### Find the number of times when the 'Wind Speed' was exactly 4 KM/H

In [27]:
# using filtering
data[data['Wind Speed_km/h'] == 4]

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
96,1/5/2012 0:00,-8.8,-11.7,79,4,9.7,100.32,Snow
101,1/5/2012 5:00,-7.0,-9.5,82,4,4.0,100.19,Snow
146,1/7/2012 2:00,-8.1,-11.1,79,4,19.3,100.15,Cloudy
...,...,...,...,...,...,...,...,...
8768,12/31/2012 8:00,-8.6,-10.3,87,4,3.2,101.14,Snow Showers
8769,12/31/2012 9:00,-8.1,-9.6,89,4,2.4,101.09,Snow
8770,12/31/2012 10:00,-7.4,-8.9,89,4,6.4,101.05,"Snow,Fog"
8772,12/31/2012 12:00,-5.8,-7.5,88,4,12.9,100.78,Snow


### Find out all the null values in the data set

In [29]:
# is null command
data.isnull()
#however in boolean form it is not very helpful
data.isnull().sum()

Date/Time           0
Temp_C              0
Dew Point Temp_C    0
Rel Hum_%           0
Wind Speed_km/h     0
Visibility_km       0
Press_kPa           0
Weather             0
dtype: int64

In [30]:
# can also you not null and sum for the opposite result but the same conclusion
data.notnull().sum()

Date/Time           8784
Temp_C              8784
Dew Point Temp_C    8784
Rel Hum_%           8784
Wind Speed_km/h     8784
Visibility_km       8784
Press_kPa           8784
Weather             8784
dtype: int64

### Rename the column name 'Weather' of the dataframe to 'Weather Condition'


In [33]:
# using rename(columns = {Column1:New Column Name}
data.rename(columns = {'Weather': 'Weather Condition'}, inplace=True)

#this doesn't apply to the original data set, only the command, put inplace flag to make it permanent
data

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog
...,...,...,...,...,...,...,...,...
8779,12/31/2012 19:00,0.1,-2.7,81,30,9.7,100.13,Snow
8780,12/31/2012 20:00,0.2,-2.4,83,24,9.7,100.03,Snow
8781,12/31/2012 21:00,-0.5,-1.5,93,28,4.8,99.95,Snow
8782,12/31/2012 22:00,-0.2,-1.8,89,28,9.7,99.91,Snow


### What is the mean 'Visiblity'

In [36]:
data['Visibility_km'].mean()

27.66444672131151

### What is the Standard Deviation of 'Pressure' in this data?

In [37]:
data.Press_kPa.std()

0.8440047459486474

### What is the Variance of 'Relative Humidity' in this data?

In [38]:
data['Rel Hum_%'].var()

286.2485501984998

### Find all the instances when 'Snow' was recorded

In [39]:
# filtering to get the rows
data[data['Weather Condition'] == 'Snow']

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
55,1/3/2012 7:00,-14.0,-19.5,63,19,25.0,100.95,Snow
84,1/4/2012 12:00,-13.7,-21.7,51,11,24.1,101.25,Snow
86,1/4/2012 14:00,-11.3,-19.0,53,7,19.3,100.97,Snow
87,1/4/2012 15:00,-10.2,-16.3,61,11,9.7,100.89,Snow
88,1/4/2012 16:00,-9.4,-15.5,61,13,19.3,100.79,Snow
...,...,...,...,...,...,...,...,...
8779,12/31/2012 19:00,0.1,-2.7,81,30,9.7,100.13,Snow
8780,12/31/2012 20:00,0.2,-2.4,83,24,9.7,100.03,Snow
8781,12/31/2012 21:00,-0.5,-1.5,93,28,4.8,99.95,Snow
8782,12/31/2012 22:00,-0.2,-1.8,89,28,9.7,99.91,Snow


In [43]:
# value counts, although it will not give you many details, still good for general purposes
data['Weather Condition'].value_counts()

Mainly Clear                               2106
Mostly Cloudy                              2069
Cloudy                                     1728
Clear                                      1326
Snow                                        390
Rain                                        306
Rain Showers                                188
Fog                                         150
Rain,Fog                                    116
Drizzle,Fog                                  80
Snow Showers                                 60
Drizzle                                      41
Snow,Fog                                     37
Snow,Blowing Snow                            19
Rain,Snow                                    18
Thunderstorms,Rain Showers                   16
Haze                                         16
Drizzle,Snow,Fog                             15
Freezing Rain                                14
Freezing Drizzle,Snow                        11
Freezing Drizzle                        

In [46]:
# consider all columns that contain snow
data[data['Weather Condition'].str.contains('Snow')]

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
41,1/2/2012 17:00,-2.1,-9.5,57,22,25.0,99.66,Snow Showers
44,1/2/2012 20:00,-5.6,-13.4,54,24,25.0,100.07,Snow Showers
45,1/2/2012 21:00,-5.8,-12.8,58,26,25.0,100.15,Snow Showers
47,1/2/2012 23:00,-7.4,-14.1,59,17,19.3,100.27,Snow Showers
48,1/3/2012 0:00,-9.0,-16.0,57,28,25.0,100.35,Snow Showers
...,...,...,...,...,...,...,...,...
8779,12/31/2012 19:00,0.1,-2.7,81,30,9.7,100.13,Snow
8780,12/31/2012 20:00,0.2,-2.4,83,24,9.7,100.03,Snow
8781,12/31/2012 21:00,-0.5,-1.5,93,28,4.8,99.95,Snow
8782,12/31/2012 22:00,-0.2,-1.8,89,28,9.7,99.91,Snow


### Find all the instances when 'Wind Speed is above 24' and 'Visiblity is 25'

In [54]:
# Use filtering with the & (and) operator
data[(data['Wind Speed_km/h'] > 24) & (data['Visibility_km'] == 25)]

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
23,1/1/2012 23:00,5.3,2.0,79,30,25.0,99.31,Cloudy
24,1/2/2012 0:00,5.2,1.5,77,35,25.0,99.26,Rain Showers
25,1/2/2012 1:00,4.6,0.0,72,39,25.0,99.26,Cloudy
26,1/2/2012 2:00,3.9,-0.9,71,32,25.0,99.26,Mostly Cloudy
27,1/2/2012 3:00,3.7,-1.5,69,33,25.0,99.30,Mostly Cloudy
...,...,...,...,...,...,...,...,...
8705,12/28/2012 17:00,-8.6,-12.0,76,26,25.0,101.34,Mainly Clear
8753,12/30/2012 17:00,-12.1,-15.8,74,28,25.0,101.26,Mainly Clear
8755,12/30/2012 19:00,-13.4,-16.5,77,26,25.0,101.47,Mainly Clear
8759,12/30/2012 23:00,-12.1,-15.1,78,28,25.0,101.52,Mostly Cloudy


### What is the mean value of each column against each 'Weather Condition'

In [56]:
# using group by
data.groupby('Weather Condition').mean()

# can sort the data by temp using sort_values()
data.groupby('Weather Condition').mean().sort_values('Temp_C', ascending=True)

Unnamed: 0_level_0,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa
Weather Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Snow Showers,Fog",-10.675,-11.9,90.75,13.75,7.025,101.2925
Freezing Fog,-7.575,-9.25,87.75,4.75,0.65,102.32
Freezing Drizzle,-5.657143,-8.0,83.571429,16.571429,9.2,100.202857
Moderate Snow,-5.525,-7.25,87.75,33.75,0.75,100.275
"Moderate Snow,Blowing Snow",-5.45,-6.5,92.5,40.0,0.6,100.57
"Freezing Drizzle,Haze",-5.433333,-8.0,82.0,10.333333,2.666667,100.316667
"Snow,Blowing Snow",-5.410526,-7.621053,84.473684,34.842105,4.105263,99.704737
"Freezing Drizzle,Snow",-5.109091,-7.072727,86.090909,16.272727,5.872727,100.520909
"Snow,Fog",-5.075676,-6.364865,90.675676,17.324324,4.537838,100.688649
"Freezing Rain,Snow Grains",-5.0,-7.3,84.0,32.0,4.8,98.56


### What is the Minimum and Maximum Value of each column against each weather condition

In [58]:
data.groupby('Weather Condition').min().sort_values('Temp_C')

Unnamed: 0_level_0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa
Weather Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Clear,1/11/2012 1:00,-23.3,-28.5,20,0,11.3,99.52
Mostly Cloudy,1/1/2012 16:00,-23.2,-28.5,18,0,11.3,98.36
Mainly Clear,1/10/2012 11:00,-22.8,-28.0,20,0,12.9,98.67
Cloudy,1/1/2012 17:00,-21.4,-26.8,18,0,11.3,98.39
Freezing Fog,1/22/2012 6:00,-19.0,-22.9,71,0,0.2,101.97
Snow,1/10/2012 1:00,-16.7,-24.6,41,0,1.0,97.75
Fog,1/1/2012 0:00,-16.0,-17.2,80,0,0.2,98.31
Snow Showers,1/12/2012 7:00,-13.3,-19.3,52,0,2.4,99.49
"Snow,Blowing Snow",1/13/2012 21:00,-12.0,-16.2,70,24,0.6,98.11
Haze,1/22/2012 12:00,-11.5,-16.0,68,0,4.8,100.35


In [62]:
data.groupby('Weather Condition').max().sort_values('Temp_C')

Unnamed: 0_level_0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa
Weather Condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Snow Showers,Fog",12/29/2012 13:00,-10.0,-11.1,92,22,9.7,102.52
"Moderate Snow,Blowing Snow",12/27/2012 12:00,-5.4,-6.4,93,41,0.6,100.64
"Freezing Rain,Snow Grains",1/13/2012 9:00,-5.0,-7.3,84,32,4.8,98.56
"Freezing Drizzle,Haze",2/1/2012 13:00,-5.0,-7.7,83,11,4.0,100.36
"Freezing Rain,Haze",2/1/2012 15:00,-4.9,-7.4,83,9,2.8,100.41
Moderate Snow,12/27/2012 9:00,-4.9,-6.7,93,39,0.8,100.67
"Snow,Haze",2/1/2012 21:00,-3.6,-6.4,81,15,6.4,100.99
"Freezing Drizzle,Snow",3/2/2012 12:00,-3.3,-4.6,94,24,12.9,101.18
"Freezing Rain,Ice Pellets,Fog",12/17/2012 3:00,-2.6,-3.7,92,28,8.0,100.95
Freezing Drizzle,2/1/2012 5:00,-2.3,-3.3,93,26,12.9,101.02


### Show all the records where weather condition is fog

In [64]:
# using string contains
data[data['Weather Condition'].str.contains('Fog')]

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog
...,...,...,...,...,...,...,...,...
8739,12/30/2012 3:00,-9.1,-10.4,90,11,3.6,100.30,"Snow,Fog"
8740,12/30/2012 4:00,-9.3,-10.6,90,13,9.7,100.28,"Snow,Fog"
8741,12/30/2012 5:00,-9.1,-10.4,90,11,4.0,100.32,"Snow,Fog"
8742,12/30/2012 6:00,-9.3,-10.8,89,17,8.0,100.39,"Snow,Fog"


In [66]:
# if only fog can use filtering
data[data['Weather Condition'] == 'Fog']

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog
5,1/1/2012 5:00,-1.4,-3.3,87,9,6.4,101.27,Fog
6,1/1/2012 6:00,-1.5,-3.1,89,7,6.4,101.29,Fog
...,...,...,...,...,...,...,...,...
8716,12/29/2012 4:00,-16.0,-17.2,90,6,9.7,101.25,Fog
8717,12/29/2012 5:00,-14.8,-15.9,91,4,6.4,101.25,Fog
8718,12/29/2012 6:00,-13.8,-15.3,88,4,9.7,101.25,Fog
8719,12/29/2012 7:00,-14.8,-16.4,88,7,8.0,101.22,Fog


### Find all instances when 'Weather is clear' or 'Visiblity is above 40'

In [72]:
# use the or operator
data[(data['Weather Condition'].str.contains('Clear')) | (data['Visibility_km'] > 40)].tail(40)

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
8639,12/25/2012 23:00,-10.4,-12.7,83,11,25.0,102.45,Clear
8640,12/26/2012 0:00,-11.8,-13.5,87,4,25.0,102.41,Clear
8641,12/26/2012 1:00,-11.2,-12.9,87,6,25.0,102.42,Clear
8642,12/26/2012 2:00,-12.7,-14.4,87,4,25.0,102.45,Clear
8643,12/26/2012 3:00,-14.2,-15.8,88,6,25.0,102.52,Clear
8644,12/26/2012 4:00,-13.1,-14.7,88,6,25.0,102.55,Clear
8645,12/26/2012 5:00,-12.7,-14.1,89,4,25.0,102.48,Clear
8646,12/26/2012 6:00,-13.4,-14.8,89,4,25.0,102.47,Clear
8647,12/26/2012 7:00,-13.0,-14.4,89,7,25.0,102.46,Mainly Clear
8650,12/26/2012 10:00,-11.0,-13.2,84,15,24.1,102.54,Mainly Clear


### Find all instacnes where 'Weather is Clear' and 'Relative Humidity' is greater than 50
### Or 'Visibility' is above 40

In [80]:
data[((data['Weather Condition'] == 'Clear') & (data['Rel Hum_%'] > 50)) | (data['Visibility_km'] > 40)]

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather Condition
106,1/5/2012 10:00,-6.0,-10.0,73,17,48.3,100.45,Mainly Clear
107,1/5/2012 11:00,-5.6,-10.2,70,22,48.3,100.41,Mainly Clear
108,1/5/2012 12:00,-4.7,-9.6,69,20,48.3,100.38,Mainly Clear
109,1/5/2012 13:00,-4.4,-9.7,66,26,48.3,100.40,Mainly Clear
110,1/5/2012 14:00,-5.1,-10.7,65,22,48.3,100.46,Mainly Clear
...,...,...,...,...,...,...,...,...
8749,12/30/2012 13:00,-12.4,-16.2,73,37,48.3,100.92,Mostly Cloudy
8750,12/30/2012 14:00,-11.8,-16.1,70,37,48.3,100.96,Mainly Clear
8751,12/30/2012 15:00,-11.3,-15.6,70,32,48.3,101.05,Mainly Clear
8752,12/30/2012 16:00,-11.4,-15.5,72,26,48.3,101.15,Mainly Clear
