# DATA SCIENTIST
 * Data Visualizaiton and Data Science Examples

# Oğuzhan Özer
<font color='black'> Content

* [Loading Data Set](#1)
* [Introduction to Python](#2)
     * [Matplotlib](#3)
* [Cleaning Data](#4)    
    * [Box Plot](#5)
    * [Melt Data](#6)
    * [Pivot Data](#7)
    * [Concatenating Data](#8)
    * [Change Data Types](#9)
    * [Missing Data](#10)
* [Pandas](#11)
    * [Visual Exploratory Data Analysis](#12)
* [Manipulating Data Frames with Pandas](#13)
    * [Indexing Data Frames](#14)
    * [Sciling Data Frames](#15)
    * [Filtering Data Frames](#16)
* [Index Objects And Labeled Data](#17)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.
!ls

<a id="1"></a> <br>
**Loading Data Set & Explanation Of Data**

In [None]:
pokemonData = pd.read_csv("../input/pokemon.csv")

In [None]:
pokemonData.info()

In [None]:
pokemonData.head(10)

In [None]:
pokemonData.corr()

In [None]:
#correlation Of Data Feature
f,ax = plt.subplots(figsize=(18,18))
sns.heatmap(pokemonData.corr(),annot=True,linewidths=.5,fmt = '.1f',ax=ax)
#for hide above text
plt.show()

In [None]:
pokemonData.columns

<a id="2"></a> <br>
**Introduction to Python**

<a id="3"></a> <br>
**Matplotblib Part**

In [None]:
pokemonData.Speed.plot(kind='line',color='y',label='Speed',linewidth=1, alpha=1, grid=True,linestyle='-.')
pokemonData.Defense.plot(color='r',label='Defense',linewidth=1, alpha=1, grid=True,linestyle='-.')
plt.legend(loc='upper right')
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.title('Line plot')

In [None]:
pokemonData.plot(kind='scatter',x='Attack',y='Defense',alpha = 0.5, color = 'green')
plt.xlabel('Attack')
plt.ylabel('Defence')
plt.title("Attack and Defense Correlation Scatter Plot")

In [None]:
pokemonData.Defense.plot(kind='hist',bins=50,figsize=(20,10))
#plt.clf()

In [None]:
defence = pokemonData['Attack']>150
pokemonData[defence]

In [None]:
pokemonData[np.logical_and(pokemonData['Defense']>200 , pokemonData['Attack']>50)]

In [None]:
for index,value in pokemonData[['Attack']][0:1].iterrows():
    print(index," : ", value)

In [None]:
avgForSpeed = sum(pokemonData.Speed)/len(pokemonData.Speed) 
pokemonData["Speed_Levels"] = ["high" if i > avgForSpeed else "low" for i in pokemonData.Speed]
pokemonData.loc[:15,["Speed","Speed_Levels"]]

<a id="4"></a> <br>
**Cleaning Data**

In [None]:
pokemonData.shape

In [None]:
print(pokemonData["Type 2"].value_counts(dropna=False))

<a id="5"></a> <br>
**Box Plot**

In [None]:
pokemonData.boxplot(column='Attack',by='Legendary')
plt.show()

<a id="6"></a> <br>
**Melt Data**

In [None]:
pokemonDataNew = pokemonData.head()
pokemonDataNew

In [None]:
pokemonDataMelted = pd.melt(frame = pokemonDataNew,id_vars = 'Name', value_vars=['Speed','Defense'])
pokemonDataMelted

<a id="7"></a> <br>
**Pivot Data**
* Reverse Melted Data

In [None]:
pokemonDataMelted.pivot(index='Name',columns='variable',values='value')

<a id="8"></a> <br>
**Concatenating Data**

In [None]:
headData = pokemonData.head()
tailData = pokemonData.tail()
conc_data = pd.concat([headData,tailData],axis=0,ignore_index=True)
conc_data

In [None]:
attack = pokemonData["Attack"].head()
speed = pokemonData["Speed"].head()
conc_data_axis1 = pd.concat([attack,speed],axis=1)
conc_data_axis1

<a id="9"></a> <br>
**Change Data Types**

In [None]:
pokemonData["Type 1"] = pokemonData["Type 1"].astype('category')
pokemonData.dtypes

<a id="10"></a> <br>
**Missing Data**

In [None]:
pokemonData.info()

In [None]:
pokemonData["Type 2"].value_counts(dropna=False)

In [None]:
#drop non values
pokemonData["Type 2"].dropna(inplace=True)
#pokemonData["Type 2"].fillna("empty",inplace=True)

In [None]:
assert pokemonData["Type 2"].notnull().all() # is true all datas were  fill with values

In [None]:
data1 = pokemonData.loc[:,["Attack","Defense","Generation"]]
data1.plot()

In [None]:
data1.plot(subplots= True)
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=2,ncols=1)
data1.plot(kind="hist",y="Defense",bins=45,range=(0,250),normed=True,ax=axes[0])
data1.plot(kind="hist",y="Defense",bins=45,range=(0,250),normed=True,ax=axes[1],cumulative=True)
plt.savefig("graph.png")
plt.show()

<a id="11"></a> <br>
**PANDAS TIME SERIES**

<a id="12"></a> <br>
**Visual Exploratory Data Analysis**

In [None]:
import warnings
warnings.filterwarnings("ignore")

data2 = pokemonData.head()
date_list = ["1992-01-10","1992-02-10","1992-03-10","1993-03-15","1993-03-16"]
dateTimeObject = pd.to_datetime(date_list)
data2["date"] = dateTimeObject
data2 = data2.set_index("date")
data2

In [None]:
data2.resample("A").mean()

In [None]:
data2.resample("M").mean()

In [None]:
data2.resample("A").first().interpolate("linear")

In [None]:
data2.resample("M").first().interpolate("linear")

<a id="13"></a> <br>
**Manipulating Data Frames with Pandas**

<a id="14"></a> <br>
**Indexing Data Frame**

In [None]:
datax = pokemonData.set_index("#")
datax.head()

In [None]:
datax.Attack[1]

In [None]:
datax.loc[1,["Attack"]]

In [None]:
datax[["Attack","Speed"]]

<a id="15"></a> <br>
**Slicing Data Frame**

In [None]:
print(type(datax["Attack"])) #series
print(type(datax[["Defense"]])) #data frames

In [None]:
pokemonData.loc[1:10,["Attack","Speed"]]

In [None]:
pokemonData.loc[10:1:-1,["Attack","Speed"]]

In [None]:
pokemonData.loc[1:10,"Speed":]

<a id="16"></a> <br>
**FILTERING DATA FRAMES**

In [None]:
booleanDataFrame = pokemonData.HP > 200
pokemonData[booleanDataFrame]

In [None]:
f1 = pokemonData.HP > 150
f2 = pokemonData.Speed > 25
pokemonData[f1 & f2]

In [None]:
pokemonData.Attack[pokemonData.Speed<15]

In [None]:
pokemonData.HP.apply(lambda x : x*2)

In [None]:
pokemonData["total_power"] = pokemonData.Attack + pokemonData.Defense
pokemonData.head()

<a id="17"></a> <br>
INDEX OBJECTS AND LABELED DATA

In [None]:
dataCopied = pokemonData.copy()
dataCopied.index = range(100,900,1)
dataCopied.head()

In [None]:
dataCopied2 = dataCopied.copy()
dataCopied2 = pokemonData.set_index(["Type 1","Type 2"])
dataCopied2.head(100)