# Python Pandas

In [None]:
'''Pandas is an open-source, BSD-licensed Python library providing high-performance, 
easy-to-use data structures and data analysis tools for the Python programming language. 
Python with Pandas is used in a wide range of fields including academic and commercial domains including 
finance, economics, Statistics, analytics, etc. In this tutorial, we will learn the various features of 
Python Pandas and how to use them in practice.
'''

In [None]:
'''This tutorial has been prepared for those who seek to learn the basics and various functions of Pandas. 
It will be specifically useful for people working with data cleansing and analysis.'''

# Pandas Cheat Sheet

[Pandas Comprehensive Document](https://github.com/m-peker/Python-Pandas-Basics/blob/master/Pandas.pdf)

# Basic Methods

In [37]:
import pandas as pd

dictionary = {"NAME":["ali","veli","kenan","hilal","ayse","evren"],
              "AGE":[15,16,17,33,45,66],
              "MAAS": [100,150,240,350,110,220]} 

dataFrame1 = pd.DataFrame(dictionary)
print(dataFrame1)
print("----------------------------")
head = dataFrame1.head(3) # To see the first n rows
print(head)
print("----------------------------")
tail = dataFrame1.tail(2) # To see the last n rows
print(tail)

    NAME  AGE  MAAS
0    ali   15   100
1   veli   16   150
2  kenan   17   240
3  hilal   33   350
4   ayse   45   110
5  evren   66   220
----------------------------
    NAME  AGE  MAAS
0    ali   15   100
1   veli   16   150
2  kenan   17   240
----------------------------
    NAME  AGE  MAAS
4   ayse   45   110
5  evren   66   220


In [38]:
print(dataFrame1.columns) # The column labels of the DataFrame.
print("----------------------------")

print(dataFrame1.info()) # Getting info about your data
print("----------------------------")

print(dataFrame1.dtypes) # print(dataFrame1.dtypes)
print("----------------------------")

print(dataFrame1.describe())  # numeric feature = columns (age,maas)

Index(['NAME', 'AGE', 'MAAS'], dtype='object')
----------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
NAME    6 non-null object
AGE     6 non-null int64
MAAS    6 non-null int64
dtypes: int64(2), object(1)
memory usage: 224.0+ bytes
None
----------------------------
NAME    object
AGE      int64
MAAS     int64
dtype: object
----------------------------
             AGE        MAAS
count   6.000000    6.000000
mean   32.000000  195.000000
std    20.474374   94.815611
min    15.000000  100.000000
25%    16.250000  120.000000
50%    25.000000  185.000000
75%    42.000000  235.000000
max    66.000000  350.000000


# Indexing and Slicing Data Frames

In [39]:
print(dataFrame1["AGE"])
print("----------------------------")

print(dataFrame1.AGE)
print("----------------------------")

dataFrame1["yeni_feature"] = [-1,-2,-3,-4,-5,-6]

print(dataFrame1.loc[:, "AGE"])
print("----------------------------")

print(dataFrame1.loc[:3, "AGE"])
print("----------------------------")

print(dataFrame1.loc[:3, ["AGE","NAME"]])
print("----------------------------")

print(dataFrame1.loc[::-1,:])
print("----------------------------")

print(dataFrame1.loc[:,:"NAME"])
print("----------------------------")

print(dataFrame1.loc[:,"NAME"])
print("----------------------------")

print(dataFrame1.iloc[:,2])

0    15
1    16
2    17
3    33
4    45
5    66
Name: AGE, dtype: int64
----------------------------
0    15
1    16
2    17
3    33
4    45
5    66
Name: AGE, dtype: int64
----------------------------
0    15
1    16
2    17
3    33
4    45
5    66
Name: AGE, dtype: int64
----------------------------
0    15
1    16
2    17
3    33
Name: AGE, dtype: int64
----------------------------
   AGE   NAME
0   15    ali
1   16   veli
2   17  kenan
3   33  hilal
----------------------------
    NAME  AGE  MAAS  yeni_feature
5  evren   66   220            -6
4   ayse   45   110            -5
3  hilal   33   350            -4
2  kenan   17   240            -3
1   veli   16   150            -2
0    ali   15   100            -1
----------------------------
    NAME
0    ali
1   veli
2  kenan
3  hilal
4   ayse
5  evren
----------------------------
0      ali
1     veli
2    kenan
3    hilal
4     ayse
5    evren
Name: NAME, dtype: object
----------------------------
0    100
1    150
2    240
3    3

# Filtering Pandas Data Frame

In [40]:
filtre1 = dataFrame1.MAAS > 200

filtrelenmis_data = dataFrame1[filtre1]

print(filtrelenmis_data)
print("----------------------------")

filtre2 = dataFrame1.AGE <20

print(filtre2)
print("----------------------------")

dataFrame1[filtre1 & filtre2]

    NAME  AGE  MAAS  yeni_feature
2  kenan   17   240            -3
3  hilal   33   350            -4
5  evren   66   220            -6
----------------------------
0     True
1     True
2     True
3    False
4    False
5    False
Name: AGE, dtype: bool
----------------------------


Unnamed: 0,NAME,AGE,MAAS,yeni_feature
2,kenan,17,240,-3


# List Comprehension

In [48]:
# import numpy as np

dictionary = {"NAME":["ali","veli","kenan","hilal","ayse","evren"],
              "AGE":[15,16,17,33,45,66],
              "MAAS": [100,150,240,350,110,220]} 

dataFrame1 = pd.DataFrame(dictionary)


ortalama_maas = dataFrame1.MAAS.mean()

# ortalama_maas_np = np.mean(dataFrame1.MAAS)


dataFrame1["maas_seviyesi"] = ["dusuk" if ortalama_maas > each else "yuksek" for each in dataFrame1.MAAS]

#for each in dataFrame1.MAAS:
#    if(ortalama_maas > each):
#        print("dusuk")
#    else:
#        print("yukse")
        
print(dataFrame1)
print("----------------------------")

print(dataFrame1.columns)
print("----------------------------")

dataFrame1.columns = [ each.lower() for each in dataFrame1.columns] 
print(dataFrame1.columns)
print("----------------------------")

dataFrame1.columns = [each.split()[0]+"_"+each.split()[1] if(len(each.split())>1) else each for each in dataFrame1.columns]
print(dataFrame1.columns)

    NAME  AGE  MAAS maas_seviyesi
0    ali   15   100         dusuk
1   veli   16   150         dusuk
2  kenan   17   240        yuksek
3  hilal   33   350        yuksek
4   ayse   45   110         dusuk
5  evren   66   220        yuksek
----------------------------
Index(['NAME', 'AGE', 'MAAS', 'maas_seviyesi'], dtype='object')
----------------------------
Index(['name', 'age', 'maas', 'maas_seviyesi'], dtype='object')
----------------------------
Index(['name', 'age', 'maas', 'maas_seviyesi'], dtype='object')


# Concatenating Data

In [54]:
# dataFrame1.drop(["yeni_feature"],axis=1,inplace = True)

# dataFrame1 = dataFrame1.drop(["yeni_feature"],axis=1)

data1 = dataFrame1.head()
data2 = dataFrame1.tail()

# vertical
data_concat = pd.concat([data1,data2],axis=0)
print(data_concat)
print("----------------------------")

# horizontal

maas = dataFrame1.maas
age = dataFrame1.age

data_h_concat = pd.concat([maas,age],axis=1)

print(data_h_concat)

    name  age  maas maas_seviyesi
0    ali   15   100         dusuk
1   veli   16   150         dusuk
2  kenan   17   240        yuksek
3  hilal   33   350        yuksek
4   ayse   45   110         dusuk
1   veli   16   150         dusuk
2  kenan   17   240        yuksek
3  hilal   33   350        yuksek
4   ayse   45   110         dusuk
5  evren   66   220        yuksek
----------------------------
   maas  age
0   100   15
1   150   16
2   240   17
3   350   33
4   110   45
5   220   66


# Transforming Data

In [58]:
dataFrame1["list_comp"] = [ each*2 for each in dataFrame1.age]
print(dataFrame1)
print("----------------------------")

# apply()
def multiply(age):
    return age*2
    
dataFrame1["apply_metodu"] = dataFrame1.age.apply(multiply)


    name  age  maas maas_seviyesi  list_comp  apply_metodu
0    ali   15   100         dusuk         30            30
1   veli   16   150         dusuk         32            32
2  kenan   17   240        yuksek         34            34
3  hilal   33   350        yuksek         66            66
4   ayse   45   110         dusuk         90            90
5  evren   66   220        yuksek        132           132
----------------------------
