# **Please Note:** This notebook was created by following a tutorial.

In [None]:
# importing pandas, os, matplotlib, seaborn modules. 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("../input/pokemon-challenge/pokemon.csv")

In [None]:
data.info()

In [None]:
data.corr()

In [None]:
# visualising a correlation map

f,ax = plt.subplots(figsize=(18,18))
sns.heatmap(data.corr(), annot=True, linewidths =.5, fmt ='.1f',ax=ax)
plt.show()

# if we did not enter 18x18, it would be a default sized heatmap.
# annot = annotate numerical values or not
# linewidth = width of the line between the cells
# fmt = how many digits after the point.

In [None]:
data.head(10)

In [None]:
data.tail(10)

In [None]:
data.columns

# every feature listed.

In [None]:
# Line Plot for both Speed and Defense

data.Defense.plot(color = 'r',label = 'Defense',linewidth=1, alpha = 0.5,grid = True,linestyle = '-.')
data.Speed.plot(kind = 'line', color = 'g',label = 'Speed',linewidth=1,alpha = 0.5,grid = True,linestyle = ':')
plt.legend(loc='upper right')     # legend = puts label into plot
plt.xlabel('x axis')              # label = name of label
plt.ylabel('y axis')
plt.title('Line Plot')            # title = title of plot
plt.show()

In [None]:
# Scatter Plot 

data.plot(kind='scatter', x='Attack', y='Defense',alpha = 0.5,color = 'red')
plt.xlabel('Attack')
plt.ylabel('Defence')
plt.title('Attack & Defense Scatter Plot')  

In [None]:
# Histogram

data.Speed.plot(kind = 'hist',bins = 45,figsize = (10,10))
plt.show()

In [None]:
# clf() = cleans the code, if written after we enter the plotting code

data.Speed.plot(kind = 'hist',bins = 50);
plt.clf()

# as seen, no output

In [None]:
series = data['Attack'] # creates a series
data_frame = data[['Defense']] # creates a data frame
print(data)

In [None]:
print(series)

In [None]:
print(data_frame)

In [None]:
#  Filtering data using Pandas

x = data['Attack']> 180
data[x]

# There exist only 2 series that have Attack > 180 

In [None]:
data[np.logical_and(data['Defense']<30, data['Attack']>50)]

# combines both comparisons with an 'and' operator

In [None]:
# demonstrating .value_counts()

print(data['Type 1'].value_counts(dropna =False))

In [None]:
# what are some pokemon with very little HP?

data[data['HP']<20]

In [None]:
# what are some legendary class pokemon?

data[data['Legendary'] == True]

In [None]:
data.all()

In [None]:
# statistical values of our data.

data.describe()

In [None]:
# Here,
# 50% = median
# 25% = Q1 = lower quartile = 'median' of (min,median)
# 75% = Q3 = upper quartile = 'median' of (median,max)

# 'median' can be used when outliers make 'mean' deceiving.

# IQR = Interquartile Range = Q1-Q3

# threshold for diagnosing outliers: value < Q1-(1.5)*IQR  - OR - value > Q3+(1.5)*IQR => value = outlier  

In [None]:
# demonstrating box plot

data.boxplot(column='Attack',by = 'Legendary')

In [None]:
# demonstrating .melt()

melted_data = pd.melt(frame=data,id_vars= 'Name', value_vars=['Attack','Defense'])
melted_data.head()

In [None]:
# using .pivot(), essentialy reversing .melt()

melted_data.pivot(index='Name', columns ='variable', values = 'value')
melted_data.head()

In [None]:
# concatenating data vertically

data2=data.head(5)
concat_data_vert =pd.concat([data,data2], axis=0, ignore_index = True)
concat_data_vert.tail(11)

In [None]:
# concatenating data horizontally

data2=data.head(5)
concat_data_horiz =pd.concat([data,data2], axis=1)
concat_data_horiz.head(10)

In [None]:
#checking for null values

data.isnull()

In [None]:
# we can see there are null values on bottom right
# this is because concat_data_vert has data2, which only had 5 rows,
# so, the remaining 795 rows are NaN

concat_data_horiz.isnull()

In [None]:
# what the features' types are, currently:

data.dtypes

In [None]:
# .astype helps us convert object types

data['Type 1'] = data['Type 1'].astype('category')
data['Speed'] = data['Speed'].astype('float')

In [None]:
# our new & updated data values

data.dtypes

In [None]:
# how many of each of 'Type 2' are there?
# INCLUDING NaN

data["Type 2"].value_counts(dropna =False)


In [None]:
# we drop the NaN values from our data
# and make this our new data

data1=data
data1["Type 2"].dropna(inplace = True)

In [None]:
# (Line) Plotting Attack, Defense, Speed on the same graph 

data1 = data.loc[:,["Attack","Defense","Speed"]]
data1.plot()

In [None]:
# to avoid confusion, we can use subplots

data1.plot(subplots = True)
plt.show()

# plt.show removes <AxesSubplot:> as seen on the
# output of the previous cell

In [None]:
# scatter plot to help us see correlations

data1.plot(kind='scatter', x="Attack", y="Speed")

In [None]:
# a histogram of Speed frequency

data1.plot(kind="hist", y="Speed", bins = 25, range = (0,200))

In [None]:
# same histogram, normalized
# meaning, area under curve sums up to 1

data1.plot(kind="hist", y="Speed", bins = 25, range = (0,200), density=True)


# note: density=True used to be normed=True in older Python versions

In [None]:
fig, axes = plt.subplots(nrows=2,ncols=1)
data1.plot(kind = "hist",y = "Defense",bins = 50,range= (0,250),density = True,ax = axes[0])
data1.plot(kind = "hist",y = "Defense",bins = 50,range= (0,250),density= True,ax = axes[1],cumulative = True)
plt

# the second historygram is out cumulative distrubitive function, or 'cdf'

In [None]:
data

In [None]:
# Making my index any other COLUMN / FEATURE

data = data.set_index("Legendary")
data.head()

In [None]:
data["Attack"][0]

In [None]:
data.Attack[1]

In [None]:
# selecting certain columns

data[["#","Attack", "HP", "Type 2"]]

In [None]:
print(type(data["HP"]))

# creates Series

print(type(data[["HP"]]))

# creates Data Frame

In [None]:
data1

In [None]:
data1.loc[10:1:-1,"Attack":"Defense"] 

In [None]:
data1.loc[5:1:-1,"Speed":"Speed"] 

In [None]:
# from Defense to the last feature

data1.loc[1:10,"Defense":] 

In [None]:
# boolean = series

boolean = data.HP > 200
data[boolean]

In [None]:
# pokemon with hp > 100, defense < 50

high_hp = data.HP > 100
low_def = data.Defense < 50
data[high_hp & low_def]

In [None]:
# pokemon with HP<25 and HP>75

low_hp = data.HP < 25
data[high_hp | low_hp]

In [None]:
specifically_filtered= data[(data["Name"] == "Jigglypuff")  | (data['Defense'] == 45)]
print(specifically_filtered)

In [None]:
def div_by_45(n):
    return n/45
much_smaller_HP=data.HP.apply(div_by_45)

In [None]:
much_smaller_HP

In [None]:
data.HP.apply(lambda n : n/45)

In [None]:
# combining columns

data["total_power"] = data.Attack + data.Defense
data.head()

In [None]:
data.index.name = "my_index_name"
data.head()

In [None]:
data.index = range(-1,799,1)
data.head()

In [None]:
data1 = data.set_index(["Type 1","Type 2"]) 
data1.head(100)

In [None]:
data1 = data.set_index(["Type 2","Type 1"]) 
data1

And that's about it. I'm pretty new, and this is my first public project on kaggle. Happy coding