**Libraries used**

In [2]:
import pandas as pd #For handling Database
import numpy as np #Powerful tool for handling multi dimensional arrays

**Loading the Dataset into a Dataframe**

In [3]:
df = pd.read_csv('Train.csv') #Reads a comma-separated values (csv) file into a DataFrame.
# Here, 'train.csv' is loaded into a Dataframe named 'df'

**Various ways of creating a Dataframe**

In [None]:
#To read .csv file
df = pd.read_csv('Train.csv') 

#To read .xlsx file
df = pd.read_excel('Train.xlsx')

#To read a dictionary
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017'],
    'temperature': [32,35,28],
    'windspeed': [6,7,2],
    'event': ['Rain', 'Sunny', 'Snow']
}
df = pd.DataFrame(weather_data)

In [None]:
#Returns no of rows and columns in dataframe
df.shape
#OR
rows, columns = df.shape

In [None]:
#Statistical info about Dataframe
df.describe()  

In [None]:
#General info about Dataframe
df.info()

In [None]:
df.head()#Display top 5 elements by defaults

df.head(n) #Displays top n elements 

In [None]:
df.tail()#Display blast 5 elements by defaults

df.tail(n) #Displays last n elements 

In [None]:
df #Prints entire dataset

In [None]:
df.columns #Returns list of column names

In [None]:
df.Dates #Display the 'Dates' column
#OR
df['Dates']

In [None]:
df[['Dates', 'Summary, Precip Type']] #Display only the mentioned columns

In [None]:
df.Temperature.max() # Returns max Temp

df.Temperature.mean() #Return avg Temp

**Data Cleaning OR Data Munging**

In [None]:
df.fillna(0, inplace = True)  #replaces EVERY Nan in the dataframe by 0
df.Temperature.mean()

**Replace Values** 

In [132]:
#Replaces Nan by 0 in Temperature column ONLY
df['Temperature'].replace(to_replace = np.nan, value = 0, inplace = True) 

In [None]:
df.Temperature.isnull().sum() # Returns no of null values in temp column

In [None]:
df.Temperature.sum() #Returns sum of all values in Temo column

In [5]:
df.Summary.count() #Returns no of entries in Summary column

999

In [None]:
df.Summary.unique() #Returns unique elements in Summary column

In [None]:
df.Summary.value_counts() #returns frequency of every unique element in Summary column

In [None]:
df[df.Temperature == df.Temperature.max()] #Gives entire entry with max temp

In [None]:
df['Dates'][df['Summary'] == 'Mostly Cloudy']  #Prints ONLY dates when Summary is 'Mostly Cloudy'

In [None]:
df[['Dates', 'Temperature']][df['Summary'] == 'Mostly Cloudy'] #Prints dates and temperature when Summary is 'Mostly Cloudy'

In [6]:
df.set_index('Dates', inplace=True) #Sets Dates as index for Dataframe

In [None]:
df.loc['2017-03-21'] #finds a element with provided index

In [None]:
df.reset_index(inplace=True) #resets the index for the dataframe
#NOTE:- While reseting index the current index of the dataframe gets deleted

In [None]:
df.set_index('Summary', inplace=True)

**In general index must be *UNIQUE***

In [None]:
df.loc['Mostly Cloudy']

In [None]:
df.head()

**Rename a Column**

In [None]:
df.rename(columns = {'Precip Type':'Precipitation'}, inplace = True)

In [None]:
df.head()

**One Hot Encoding**

In [142]:
dummies = pd.get_dummies(df.Summary) #Create a new Dataframe 'dummies' with vectors representing 'Summary' column

In [None]:
df = pd.concat([df,dummies],axis='columns')  #Merges two dataframe i.e. dummies and df

In [None]:
df = df.drop(['Summary'], axis='columns')  #removes the unwanted Summary coumn
df

**Spliting using Pandas**

In [147]:
df1 = df.copy()

In [153]:
train = df1.sample(frac=0.75, random_state=1)
test = df1.drop(train.index)

In [None]:
train

In [None]:
test.count()

**Spliting using Scikit-Learn**

In [None]:
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    
#To execute the above command you must import scikit-learn like -> from sklearn.model_selection import train_test_split
#Also create varibles 'X' and 'y'