# Pandas

In [1]:
import pandas as pd

###### Veri Yapısı

Seriler

In [2]:
s = pd.Series([3,-5,7,4],index=['a','b','c','d'])

In [3]:
s

a    3
b   -5
c    7
d    4
dtype: int64

Dataframe

In [4]:
data ={'Country':['Belgium','India','Brazil'],
       'Capital':['Brussels','New Delhi','Brasilia'],
      'Population':[11190846,1303171835,207847528]}

In [6]:
df = pd.DataFrame(data, columns=['Country','Capital','Population'])

In [7]:
df

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171835
2,Brazil,Brasilia,207847528


###### Dropping

In [8]:
s.drop(['a','c'])

b   -5
d    4
dtype: int64

In [9]:
df.drop('Country',axis=1)

Unnamed: 0,Capital,Population
0,Brussels,11190846
1,New Delhi,1303171835
2,Brasilia,207847528


###### Yardım

In [10]:
help(pd.Series.loc)

Help on property:

    Access a group of rows and columns by label(s) or a boolean array.
    
    ``.loc[]`` is primarily label based, but may also be used with a
    boolean array.
    
    Allowed inputs are:
    
    - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
      interpreted as a *label* of the index, and **never** as an
      integer position along the index).
    - A list or array of labels, e.g. ``['a', 'b', 'c']``.
    - A slice object with labels, e.g. ``'a':'f'``.
    
          start and the stop are included
    
    - A boolean array of the same length as the axis being sliced,
      e.g. ``[True, False, True]``.
    - An alignable boolean Series. The index of the key will be aligned before
      masking.
    - An alignable Index. The Index of the returned selection will be the input.
    - A ``callable`` function with one argument (the calling Series or
      DataFrame) and that returns valid output for indexing (one of the above)
    
    See more at 

###### Sort & Rank (Sıralama)

In [11]:
df.sort_index()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171835
2,Brazil,Brasilia,207847528


In [12]:
df.sort_values(by='Country')

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
2,Brazil,Brasilia,207847528
1,India,New Delhi,1303171835


In [13]:
df.rank()

Unnamed: 0,Country,Capital,Population
0,1.0,2.0,1.0
1,3.0,3.0,3.0
2,2.0,1.0,2.0


###### I/O

CVS okuma ve Kaydetme

In [None]:
pd.read_csv('file.cvs', header=None, nrows=5)

In [16]:
df.to_csv('myDataFrame.cvs')

Excel Okuma ve Yazma

In [None]:
pd.read_excel('file.xlsx')

In [17]:
df.to_excel('myDataFrame.xlsx', sheet_name='Sheet1')

In [None]:
xlsx = pd.ExcelFile('file.xls')

In [None]:
df = pd.read_excel(xlsx, 'sheet1')

SQL Query ve Database Tablo Okuma ve Yazma

In [18]:
from sqlalchemy import create_engine

In [None]:
engine = create_engine('sqlite:///:memory:')

In [None]:
pd.read_sql("SELECT * FROM my_table;",engine)

In [None]:
pd.read_sql_table('my_table', engine)

In [None]:
pd.read_sql_query("SELECT * FROM my_table;",engine)

In [None]:
df.to_sql('myDf',engine)

###### Seçim İşlemleri

Getting

In [19]:
s['b']

-5

In [20]:
df[1:]

Unnamed: 0,Country,Capital,Population
1,India,New Delhi,1303171835
2,Brazil,Brasilia,207847528


Selecting, Boolean Indexing & Setting

Pozisyona Göre

In [27]:
df.iloc[[0],[0]]

Unnamed: 0,Country
0,Belgium


In [34]:
df.iat[0,0]

'Belgium'

Etikete Göre

In [31]:
df.loc[[0],['Country']]

Unnamed: 0,Country
0,Belgium


In [33]:
df.at[0,'Country']

'Belgium'

Hem Etiket Hem de Pozisyona Göre

In [None]:
df.ix[2]

Boolean Indexing

In [39]:
df[df['Population']>1200000000]

Unnamed: 0,Country,Capital,Population
1,India,New Delhi,1303171835


Setting

In [41]:
s

a    6
b   -5
c    7
d    4
dtype: int64

In [42]:
s['a'] = 15

In [43]:
s

a    15
b    -5
c     7
d     4
dtype: int64

###### retrieving series / dataframe information

Basit Bilgiler

In [44]:
df.shape

(3, 3)

In [45]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [46]:
df.columns

Index(['Country', 'Capital', 'Population'], dtype='object')

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Country     3 non-null      object
 1   Capital     3 non-null      object
 2   Population  3 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [48]:
df.count()

Country       3
Capital       3
Population    3
dtype: int64

Özet

In [49]:
df.sum()

Country              BelgiumIndiaBrazil
Capital       BrusselsNew DelhiBrasilia
Population                   1522210209
dtype: object

In [50]:
df.cumsum()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,BelgiumIndia,BrusselsNew Delhi,1314362681
2,BelgiumIndiaBrazil,BrusselsNew DelhiBrasilia,1522210209


In [51]:
df.min()

Country        Belgium
Capital       Brasilia
Population    11190846
dtype: object

In [52]:
df.max()

Country            India
Capital        New Delhi
Population    1303171835
dtype: object

In [None]:
df.idxmin()
df.idxmax()

In [54]:
df.describe()

Unnamed: 0,Population
count,3.0
mean,507403400.0
std,696135100.0
min,11190850.0
25%,109519200.0
50%,207847500.0
75%,755509700.0
max,1303172000.0


In [55]:
df.mean()

Population    507403403.0
dtype: float64

In [56]:
df.median()

Population    207847528.0
dtype: float64

###### Fonksiyonların Uygulanması

In [57]:
f = lambda x: x*2

In [58]:
df.apply(f)

Unnamed: 0,Country,Capital,Population
0,BelgiumBelgium,BrusselsBrussels,22381692
1,IndiaIndia,New DelhiNew Delhi,2606343670
2,BrazilBrazil,BrasiliaBrasilia,415695056


In [59]:
df.applymap(f)

Unnamed: 0,Country,Capital,Population
0,BelgiumBelgium,BrusselsBrussels,22381692
1,IndiaIndia,New DelhiNew Delhi,2606343670
2,BrazilBrazil,BrasiliaBrasilia,415695056


###### Veri Hizalama

In [61]:
s3= pd.Series([7,-2,3],index=['a','c','d'])

In [63]:
s

a    True
b    True
c    True
d    True
dtype: bool

In [64]:
s3

a    7
c   -2
d    3
dtype: int64

In [65]:
s+s3

a    8.0
b    NaN
c   -1.0
d    4.0
dtype: float64

In [None]:
s.add(s3, fill_values=8)

In [None]:
s.sub(s3, fill_values=2)

In [None]:
s.div(s3, fill_values=3)

In [None]:
s.nul(s3, fill_values=4)