**Content:**
1. [Introduction to Python:](#1)
    1. [Matplotlib](#2)
    1. [Dictionaries ](#3)
    1. [Pandas](#4)
    1. [Logic, control flow and filtering](#5)
    1. [Loop data structures](#6)
1. [Python Data Science Toolbox:](#7)
    1. [User defined function](#8)
    1. [Scope](#9)
    1. [Nested function](#10)
    1. [Default and flexible arguments](#11)
    1. [Lambda function](#12)
    1. [Anonymous function](#13)
    1. [Iterators](#14)
    1. [List comprehension](#15)
1. [Cleaning Data](#16)
    1. [Diagnose data for cleaning](#17)
    1. [Exploratory data analysis](#18)
    1. [Visual exploratory data analysis](#19)
    1. [Tidy data](#20)
    1. [Pivoting data](#21)
    1. [Concatenating data](#22)
    1. [Data types](#23)
    1. [Missing data and testing with assert](#24)
1. [Pandas Foundation](#25)
    1. [Review of pandas](#26)
    1. [Building data frames from scratch](#27)
    1. [Visual exploratory data analysis](#28)
    1. [Statistical explatory data analysis](#29)
    1. [Indexing pandas time series](#30)
    1. [Resampling pandas time series](#31)
1. [Manipulating Data Frames with Pandas](#32)
    1. [Indexing data frames](#33)
    1. [Slicing data frames](#34)
    1. [Filtering data frames](#35)
    1. [Transforming data frames](#36)
    1. [Index objects and labeled data](#37)
    1. [Hierarchical indexing](#38)
    1. [Pivoting data frames](#39)
    1. [Stacking and unstacking data frames](#40)
    1. [Melting data frames](#41)
    1. [Categoricals and groupby](#42)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns  # visualization tool

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))

import os
for dirname, _, filenames in os.walk("/kaggle/input"):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# Any results you write to the current directory are saved as output.

In [None]:
data = pd.read_csv("/kaggle/input/pokemon.csv")
data = data.drop("#",axis=1)

In [None]:
data.info()

In [None]:
data.head()

In [None]:
# correaülaiton map
f,ax = plt.subplots(figsize=(18,18))
sns.heatmap(data.corr(),annot=True, linewidths=1, fmt='.1f',ax=ax)
plt.show()

In [None]:
data.corr()

In [None]:
# datanın sahip oldugu features
data.columns

<a id="1"></a><br>
# 1.INDRODUCTION TO PYTHON

<a id="2"></a><br>
## MATPLOTLİB

* Line plot x ekseni time(zaman) ise kullanılır.
* Scatter plot iki değişken arasındaki değişikliğe (korelasyonuna) bakacağımız zaman kullanırız
* Histogram data'nın dağılımına(sıklığına) bakacaksak kullanabiliriz.

In [None]:
# Line plot
data.Speed.plot(kind="line",color="blue",label="Speed",linewidth=1, grid= True, linestyle=":", figsize=(10,6))
data.Defense.plot(kind="line",color="red",label="Defense", linewidth=1, grid= True, linestyle="-.",figsize=(10,6))
plt.legend(loc="upper right")
plt.xlabel("x axis")
plt.ylabel("y axis")
plt.title("Line Plot")
plt.show()

In [None]:
# Scatter plot
data.plot(kind="scatter", x="Attack", y="Defense", alpha=0.5,color="red",figsize=(10,6))
plt.xlabel("Attack")
plt.ylabel("Defense")
plt.title("Attack Defense Scatter Plot")

In [None]:
# Histogram
data.Speed.plot(kind="hist",bins=50,figsize=(15,8))
plt.xlabel("Speed")
plt.ylabel("Speed Frekansı")

In [None]:
# clf() = cleans it up again you can start a fresh
data.Speed.plot(kind="hist",bins=50,figsize=(15,8))
plt.clf() #  çizdirdiğimiz plotu clear eder ortadan kaldırır

<a id="3"></a><br>
# Dictionary

In [None]:
dic = {"spain":"madrid","usa": "vegas"}
print(dic.keys())
print(dic.values())

In [None]:
dic["spain"] ="barcelona"
print(dic)
dic["france"] = "paris"
print(dic)
print("france" in dic)
dic.clear()
print(dic)

<a id="4"></a><br>
## PANDAS

In [None]:
df = pd.read_csv("/kaggle/input/pokemon.csv")
df = df.drop("#",axis=1)
df.head()

In [None]:
series = data["Defense"]
print(type(series))
data_frame = df[["Defense"]]
print(type(data_frame))

In [None]:
x = df["Defense"] > 200
df[x]

In [None]:
df[np.logical_and(df["Defense"]>200, df["Attack"]>100)] 
# defense degeri 200 den buyuk ve attack degeri 100 den buyuk olan satırları al getir. her iki kosulunda saglanması gerekli

In [None]:
# defense degeri 200 den buyuk veya attack degeri 180 den buyuk olan satırları al getir. iki kosuldan birinin saglanması yeterlidir
df[np.logical_or(df["Defense"]>200,df["Attack"]>180)]  

In [None]:
lis = [1,2,3,4,5]
for index,value in enumerate(lis):
    print(index," : ",value)

In [None]:
dictionary = {"spain":"madrid","france":"paris"}
for key,value in dictionary.items():
    print(key,":",value)

In [None]:
for index,value in data[["Attack"]].iterrows():
    print(index,":",value)

<a id="7"></a><br>
## Python Data Science  Toolbox

In [None]:
# USER DEFINED FUNCTION
def tuble_ex():
    t = (1,2,3)
    return t
a,b,c = tuble_ex()
print(a,b,c)

In [None]:
lis = [1,2,3]
a,b,c = lis
print(a,b,c)

In [None]:
# SCOPE
x = 5
def f():
    y = 2 * x # local bir x eğişkeni olmadıgı için global x üzerinden işlem yapar
    return y
print(f())

In [None]:
x = 5
def f():
    x = 3
    y = 2 * x # local x degeri oldugu için local x degeri üzerinde işlem yaptı
    return y
print(f())

In [None]:
import builtins
dir(builtins)

In [None]:
# NESTED Functions
def square():
    def add():
        x = 2
        y = 3
        z = x+y
        return z
    return add()**2
print(square())

In [None]:
# Default and Flexible Arguments
def f(a,b=1,c=2):
    y = a+b+c
    return y
print(f(5))
print(f(5,4,3))

In [None]:
def f(*args):
    for i in args:
        print(i)
f(1,2,3,4)


def ff(**kwargs):
    for key,value in kwargs.items():
        print(key, ":", value)
ff(country ="france", capital="paris", population = 15005587)

In [None]:
# Lambda Functions
square = lambda x: x**2
print(square(4))

In [None]:
# Anonymous Functions
liste =[1,2,3,4,5]
y = map(lambda x: x**2,liste)
print(list(y))

In [None]:
# ITERATORS
name = "ronaldo"
it = iter(name)
print(next(it))
print(next(it))
print(*it)

In [None]:
# zip() methodu
list1 = [1,2,3,4,5]
list2 = [6,7,8,9]
z = zip(list1,list2)
print(z)
z_list = list(z)
print(z_list)

In [None]:
un_zip = zip(*z_list)
un_list1,un_list2 = list(un_zip)
print(un_list1)
print(un_list2)
print(type(un_list1))


In [None]:
# LIST COMPREHENSİON
num1 = [1,2,3]
num2 = [i+1 for i in num1]
print(num2)

In [None]:
num1 = [5,10,15]
num2 = []
for i in num1:
    if i == 10:
        i = i**2
        num2.append(i)
    elif i<7:
        i = i-5
        num2.append(i)
    else:
        i = i+5
        num2.append(i)
print(num2)

In [None]:
num1 = [5,10,15]
num2 = [i**2 if i == 10 else i-5 if i < 7 else i+5 for i in num1]
print(num2)

In [None]:
# Pokemon data 
threshold = sum(data.Speed)/len(data.Speed)
print("threshol: ",threshold)
data["speed_level"] = ["high" if i > threshold else "low"  for i in data.Speed]
data.loc[:10,["speed_level","Speed"]]

<a id="16"></a><br>
# Cleaning Data

We need to diagnose and clean data before exploring.
Unclean data:
   * Column name inconsistency like upper-lower case letter or space between words(Büyük-küçük harf veya kelimeler arasındaki boşluk gibi sütun adı tutarsızlığı)
   * missing data (Eksik veriler)
   * different language (farklı dil)

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data["Type 1"].value_counts()

In [None]:
# Exploratory Data Analysis (EDA)

In [None]:
data.describe().T

In [None]:
# Visual Explotary Data Analysis

In [None]:
data.boxplot(column="Attack", by="Legendary",figsize=(12,6))
plt.show()

In [None]:
# TIDY DATA


In [None]:
data_new = data.head()
data_new

In [None]:
melted = pd.melt(frame=data_new, id_vars="Name", value_vars=["Attack","Defense"])
melted

In [None]:
# PIVOTING DATA

In [None]:
melted.pivot(index="Name", columns="variable", values="value")

In [None]:
# Concatenating Data

In [None]:
data1 = data.head()
data2 = data.tail()
conc_data_row = pd.concat([data1,data2],axis=0, ignore_index=True) # ignore index iki data yı birleştirirken indekslerin dogru sekilde sıralanmasını saglarız

In [None]:
data1 = data["Attack"].head()
data2 = data["Defense"].head()
conc_data_col = pd.concat([data1,data2],axis=1)
conc_data_col

In [None]:
# Data Types
data.dtypes

In [None]:
# data daki feature ların veri tipini değiştirme
data["Type 1"] = data["Type 1"].astype("category")
data["Speed"] = data["Speed"].astype("float")

In [None]:
data.dtypes

In [None]:
# MISSING DATA (Eksik Veriler)

In [None]:
data.head(10) # Veri setindeki NaN degerler bos degerlerdir

In [None]:
data["Type 2"].value_counts(dropna=False)

In [None]:
ds = data.copy()
ds.head()

In [None]:
# Eksik verileri silme yaklaşımı
ds["Type 2"].dropna(inplace=True)

In [None]:
assert ds["Type 2"].notnull().all()

In [None]:
data["Type 2"].fillna("empty",inplace=True)

In [None]:
assert data.Speed.dtypes == np.float

<a id="25"></a><br>
## Pandas Foundation

In [None]:
country = ["Spain","France"]
population = ["11","12"]
list_label = ["country","population"]
list_col = [country,population]
zipped = list(zip(list_label,list_col))
print(zipped)

In [None]:
data_dict = dict(zipped)
dc = pd.DataFrame(data_dict)
dc

In [None]:
dc["capital"] = ["madrid","paris"] # yeni bir feature (column) ekleme
dc

In [None]:
dc["income"] = 0
dc

In [None]:
# VISUAL EXPLOROTARY DATA ANALYSIS
data1 = data.loc[:,["Attack","Defense","Speed"]]
data1.head()

In [None]:
data1.plot(figsize=(12,8))

In [None]:
#subplots
data1.plot(subplots= True,figsize=(12,6))
plt.show()

In [None]:
data1.plot(kind="hist", y = "Defense", bins=50, range=(0,250), normed= True) # burada range x ekseninin hangi aralıkta olacağını belirler. normed ise datamızı normalize edilmiş halini çizer.
plt.show()
# normalize etmek degerleri 0-1 arasında sayılara normalizze edilir

In [None]:
fig,ax = plt.subplots(nrows=2,ncols=1,figsize=(12,6))
data1.plot(kind="hist", y= "Defense",bins=50,range=(0,250), normed= True, ax=ax[0])
data1.plot(kind="hist", y= "Defense",bins=50,range=(0,250), normed= True, ax=ax[1], cumulative = True) # cumulative frekansları toplaya tplaya gider yani en sondaki bins sutun tüm frekansların toplamıdır aslında
plt.savefig("graph.png")
plt.show()

### STATISTICAL EXPLORATORY DATA ANALYSIS

* count
* min 
* max
* std
* mean
* %25
* %50
* %75

In [None]:
data.describe().T

In [None]:
# INDEXING PANDAS TIME SERIES

In [None]:
time_list = ["1992-03-08","1992-04-12"]
print(type(time_list[1]))

datetime_object = pd.to_datetime(time_list)
print(type(datetime_object))

In [None]:
data2 = data.head()
date_list = ["1992-01-10","1992-02-10","1992-03-10","1993-04-10","1993-05-10"]
datetime_object = pd.to_datetime(date_list)
data2["date"] = datetime_object
data2 = data2.set_index("date")
data2

In [None]:
print(data2.loc["1992-01-10"])

In [None]:
print(data2.loc["1992-01-10":"1992-03-10"])

In [None]:
# RESAMPLING PANDAS TIME SERIES

In [None]:
# "A"-> year burada bize yıllara göre değerlerin ortlamasını getirir. Örneğin 1992 yılında pokemonların speed(hız) ortalaması 61.666.. ymış.
data2.resample("A").mean()

In [None]:
# "M" -> ay , aylara göre bizim featureların ortalama değerlerini veriyor
data2.resample("M").mean()

In [None]:
# eksik olan degerleri  şu şekil dolduruyor.Örneğin Defense sutununda 83 ile 123 aralıgında kalan tum degerler bostu eksik degerleri 83 ten baslayarak belirli bir 
# artış degeri vererek linear olarak 123 e kadar doldurur
data2.resample("M").first().interpolate("linear")

In [None]:
data2.resample("M").mean().interpolate("linear")

<a id="32"></a><br>
## Manipulating Data Frames with Pandas

In [None]:
data = pd.read_csv("/kaggle/input/pokemon.csv")
data = data.drop("#",axis=1)
data.head()

In [None]:
data["Defense"][0]

In [None]:
data.Defense[0]

In [None]:
data.loc[0,["Defense"]]

In [None]:
data[["Defense","Attack"]].head()

In [None]:
# Slicing DataFrame
print(type(data["Defense"]))
print(type(data[["Defense"]]))

In [None]:
data.loc[1:10,"Attack":"Defense"]

In [None]:
data.iloc[1:10,4:6]

In [None]:
# FILTERING DATAFRAMES
boolean = data.HP > 200
data[boolean]

In [None]:
first_filter = data.HP > 150
second_filter = data.Speed > 35
data[first_filter & second_filter]

In [None]:
data[data.Speed < 15]

In [None]:
data.HP[data.Speed < 15]

In [None]:
# Transforming Data
def div_(n):
    return n/2
data.HP.apply(div_)[0:10]

In [None]:
data.HP.apply(lambda n: n/2)[0:10]

In [None]:
data["total_power"] = data.Attack + data.Defense
data.head()

In [None]:
# INDEX OBJECTS AND LABELED DATA

In [None]:
data = data.set_index(["Type 1","Type 2"])

In [None]:
data.head(100)

In [None]:
# PIVOTING DATAFRAMES
dic = {"treatment":["A","A","B","B"],
       "gender":["F","M","F","M"],
       "response":[10,45,5,9],
      "age":[15,4,72,65]}
df = pd.DataFrame(dic)
df

In [None]:
df.pivot(index="treatment",columns="gender",values="response")

In [None]:
# Stacking and UnStacking DataFrame
df1 = df.set_index(["treatment","gender"])
df1

In [None]:
df1.unstack(level=1)

In [None]:
 df1.unstack(level=0)

In [None]:
df_ = df1.swaplevel(0,1)
df_

In [None]:
# MELTING

In [None]:
pd.melt(df,id_vars="treatment", value_vars=["age","response"])

In [None]:
# Groupby
df.groupby("treatment").mean()

In [None]:
df.groupby("treatment").age.mean()