# Pandas'da Cokca Kullanilan Fonksiyon ve Metodlar

Bütün fonksiyon ve methodları görebilmek için bu [dökümantasyon](https://pandas.pydata.org/pandas-docs/stable/reference/index.html) incelenebilir.

* [apply() method](#apply_method)
* [apply() with a function](#apply_function)
* [apply() with a lambda expression](#apply_lambda)
* [apply() on multiple columns](#apply_multiple)
* [describe()](#describe)
* [sort_values()](#sort)
* [corr()](#corr)
* [idxmin and idxmax](#idx)
* [value_counts()](#v_c)
* [replace()](#replace)
* [unique and nunique()](#uni)
* [map()](#map)
* [duplicated and drop_duplicates()](#dup)
* [between()](#bet)
* [sample()](#sample)
* [nlargest()](#n)

In [84]:
import pandas as pd
import numpy as np

In [85]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [86]:
df_tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


***.apply() metodu***

In [87]:
# bir series ya da dataframe'in sutunlarindaki her bir elemanina fonksiyon uygulamak icin kullanilir
# arkaplanda for dongusu kullanir

## 1. Yol

In [88]:
def son_4(sayi): # icine yazilan sayiyi stringe cevirip son 4 karakterini donduren fonksiyon
    return str(sayi)[-4:]

In [89]:
df_tips["CC Number"].apply(son_4) # gecici olarak son 4 karakteri gosterir

0      3410
1      9230
2      1322
3      5994
4      7221
       ... 
239    2842
240    5404
241    7196
242    0950
243    8139
Name: CC Number, Length: 244, dtype: object

In [90]:
df_tips["CC Number"] = df_tips["CC Number"].apply(son_4) # kalici olarak son 4 karakteri atar
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,9230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,1322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,5994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,7221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,2842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,5404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,7196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,0950,Sat17


## 2. Yol

In [91]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [92]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [93]:
df_tips["CC Number"].apply(lambda sayi : str(sayi)[-4:]) # ayni islemi lambda ile yapma

0      3410
1      9230
2      1322
3      5994
4      7221
       ... 
239    2842
240    5404
241    7196
242    0950
243    8139
Name: CC Number, Length: 244, dtype: object

In [94]:
df_tips["CC Number"] = df_tips["CC Number"].apply(son_4) # kalici olarak son 4 karakteri atar
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,9230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,1322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,5994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,7221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,2842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,5404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,7196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,0950,Sat17


## 3. Yol (Vektorize Edilmis)(En Hizli Yol)

In [95]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [96]:
df_tips["CC Number"]

0      3560325168603410
1      4478071379779230
2      6011812112971322
3      4676137647685994
4      4832732618637221
             ...       
239    5296068606052842
240    3506806155565404
241    6011891618747196
242       4375220550950
243    3511451626698139
Name: CC Number, Length: 244, dtype: int64

In [97]:
df_tips["CC Number"].astype(str).str[-4:] # vektorize edilmis hali, en hizli yol

0      3410
1      9230
2      1322
3      5994
4      7221
       ... 
239    2842
240    5404
241    7196
242    0950
243    8139
Name: CC Number, Length: 244, dtype: object

# lambda ile uygulanan yol

In [98]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [99]:
df_tips["fiyat_kategorisi"] = df_tips["total_bill"].apply(lambda x: "$" if x < 15 else ("$$" if x < 30 else "$$$"))
# total_bill degerine gore fiyat_kategorisi adinda yeni bir sutun ekler

In [100]:
df_tips.sample(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,fiyat_kategorisi
187,30.46,2.0,Male,Yes,Sun,Dinner,5,6.09,David Barrett,4792882899700988,Sun9987,$$$
190,15.69,1.5,Male,Yes,Sun,Dinner,2,7.84,Riley Barnes,180053549128800,Sun5104,$$
10,10.27,1.71,Male,No,Sun,Dinner,2,5.14,William Riley,566287581219,Sun2546,$
13,18.43,3.0,Male,No,Sun,Dinner,4,4.61,Joshua Jones,6011163105616890,Sun2971,$$
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17,$$
171,15.81,3.16,Male,Yes,Sat,Dinner,2,7.9,David Hall,502004138207,Sat6750,$$
219,30.14,3.09,Female,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252,Sat8863,$$$
153,24.55,2.0,Male,No,Sun,Dinner,4,6.14,Todd Patterson,4416804908942159,Sun8670,$$
66,16.45,2.47,Female,No,Sat,Dinner,2,8.22,Rachel Vaughn,3569262692675583,Sat4750,$$
60,20.29,3.21,Male,Yes,Sat,Dinner,2,10.14,Anthony Mclean,347614304015027,Sat2353,$$


# vektorize sekilde uygulanan yol

In [101]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [102]:
def fiyat_kategori_vectorized(odeme): 
    kategori = np.full_like(odeme, "$", dtype = "<U3") # <U3 => unicode string 3 karaktere kadar
    
    kategori[(odeme > 10) & (odeme <= 30)] = "$$"
    
    kategori[odeme >= 30] = "$$$"
    
    return kategori

In [103]:
fiyat_kategori_vectorized(df_tips["total_bill"].values)

array(['$$', '$$', '$$', '$$', '$$', '$$', '$', '$$', '$$', '$$', '$$',
       '$$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$$$', '$$', '$$', '$$', '$$', '$$', '$$', '$', '$$', '$$',
       '$$', '$$', '$$', '$$', '$$', '$$', '$$$', '$$', '$$', '$$', '$',
       '$$$', '$$', '$$', '$$$', '$$', '$$', '$$', '$$', '$$$', '$', '$$',
       '$$', '$$$', '$$', '$$', '$$$', '$$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$$', '$$', '$$', '$$', '$$', '$$$', '$$', '$$$', '$$', '$$',
       '$$', '$$', '$$', '$$', '$', '$$', '$$', '$$$', '$$', '$$', '$$',
       '$$', '$$', '$$', '$$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$', '$$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$$', '$$', '$$', '$$', '$', '$$', '$$', '$$', '$$', '$$',
       '$$', '$$', '$$', '$', '$$', '$$', '$$', '$$', '$$', '$$$', '$$$',
       '$$', '$$', '$', '$$', '$$', '$', '$',

In [104]:
df_tips["fiyat_kategorisi_vektorize"] = fiyat_kategori_vectorized(df_tips["total_bill"].values)

In [105]:
df_tips.sample(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,fiyat_kategorisi_vektorize
68,20.23,2.01,Male,No,Sat,Dinner,2,10.12,Mr. Travis Bailey Jr.,60406789937,Sat561,$$
121,13.42,1.68,Female,No,Thur,Lunch,2,6.71,Laura Garcia,5181484390945653,Thur2158,$$
152,17.26,2.74,Male,No,Sun,Dinner,3,5.75,Gregory Smith,4292362333741,Sun5205,$$
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,$$
145,8.35,1.5,Female,No,Thur,Lunch,2,4.18,Amy Young,4285454264477,Thur9331,$
226,10.09,2.0,Female,Yes,Fri,Lunch,2,5.04,Ruth Weiss,5268689490381635,Fri6359,$$
6,8.77,2.0,Male,No,Sun,Dinner,2,4.38,Kristopher Johnson,2223727524230344,Sun5985,$
20,17.92,4.08,Male,No,Sat,Dinner,2,8.96,Thomas Rice,4403296224639756,Sat1709,$$
147,11.87,1.63,Female,No,Thur,Lunch,2,5.94,Annette Cunningham,675937746864,Thur4780,$$
163,13.81,2.0,Male,No,Sun,Dinner,2,6.9,Charles Newton,5552793481414044,Sun8594,$$


## .apply() birden fazla sutuna uygulanabilir

In [106]:
df_tips = pd.read_csv("tips.csv")
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [107]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [108]:
def dort_ekle(sayi):
    return sayi + 4

In [109]:
df_tips[["total_bill", "tip"]].apply(dort_ekle)

Unnamed: 0,total_bill,tip
0,20.99,5.01
1,14.34,5.66
2,25.01,7.50
3,27.68,7.31
4,28.59,7.61
...,...,...
239,33.03,9.92
240,31.18,6.00
241,26.67,6.00
242,21.82,5.75


In [110]:
# lambda ile ayni islem
df_tips[["total_bill", "tip"]].apply(lambda x : x + 4)

Unnamed: 0,total_bill,tip
0,20.99,5.01
1,14.34,5.66
2,25.01,7.50
3,27.68,7.31
4,28.59,7.61
...,...,...
239,33.03,9.92
240,31.18,6.00
241,26.67,6.00
242,21.82,5.75


In [111]:
df_tips.describe()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [112]:
df_tips.sort_values(by = "total_bill", ascending = False) # total_bill sutununa gore buyukten kucuge siralamak (gecici)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
170,50.81,10.00,Male,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954
212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139
156,48.17,5.00,Male,No,Sun,Dinner,6,8.03,Ryan Gonzales,3523151482063321,Sun7518
182,45.35,3.50,Male,Yes,Sun,Dinner,3,15.12,Jose Parsons,4112207559459910,Sun2337
...,...,...,...,...,...,...,...,...,...,...,...
149,7.51,2.00,Male,No,Thur,Lunch,2,3.76,Daniel Robbins,4823139288341889,Thur6321
111,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801
172,7.25,5.15,Male,Yes,Sun,Dinner,2,3.62,Larry White,30432617123103,Sun9209
92,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780


In [113]:
df_tips.sort_values(by = "total_bill", ascending = True) # total_bill sutununa gore kucukten buyuge siralamak (gecici)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
67,3.07,1.00,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455
92,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780
111,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801
172,7.25,5.15,Male,Yes,Sun,Dinner,2,3.62,Larry White,30432617123103,Sun9209
149,7.51,2.00,Male,No,Thur,Lunch,2,3.76,Daniel Robbins,4823139288341889,Thur6321
...,...,...,...,...,...,...,...,...,...,...,...
182,45.35,3.50,Male,Yes,Sun,Dinner,3,15.12,Jose Parsons,4112207559459910,Sun2337
156,48.17,5.00,Male,No,Sun,Dinner,6,8.03,Ryan Gonzales,3523151482063321,Sun7518
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139
212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590


In [114]:
df_tips.sort_values(by = "total_bill", ascending = True).reset_index() # indexleri de yeniden duzenleyerek siralama yapar
# reset_index ile eski indexler yeni bir sutun olarak eklenir, drop = True ile eski indexler eklenmez

Unnamed: 0,index,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,67,3.07,1.00,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455
1,92,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780
2,111,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801
3,172,7.25,5.15,Male,Yes,Sun,Dinner,2,3.62,Larry White,30432617123103,Sun9209
4,149,7.51,2.00,Male,No,Thur,Lunch,2,3.76,Daniel Robbins,4823139288341889,Thur6321
...,...,...,...,...,...,...,...,...,...,...,...,...
239,182,45.35,3.50,Male,Yes,Sun,Dinner,3,15.12,Jose Parsons,4112207559459910,Sun2337
240,156,48.17,5.00,Male,No,Sun,Dinner,6,8.03,Ryan Gonzales,3523151482063321,Sun7518
241,59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139
242,212,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590


In [115]:
df_tips.sort_values(by = ["tip", "total_bill"], ascending = True).reset_index(drop = True) 
# once tip sonra total_bill sutununa gore siralama yapar
# tip ayni olanlar arasinda total_bill e gore siralama yapar
# drop = True ile eski indexler eklenmez

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,3.07,1.00,Female,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455
1,5.75,1.00,Female,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780
2,7.25,1.00,Female,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801
3,12.60,1.00,Male,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032
4,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
...,...,...,...,...,...,...,...,...,...,...,...
239,34.30,6.70,Male,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025
240,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139
241,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239
242,48.33,9.00,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590


In [116]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [117]:
df_tips["price_per_person"].max() # price_per_person sutunundaki en yuksek degeri dondurur

np.float64(20.27)

In [118]:
df_tips["price_per_person"].argmax() # price_per_person sutunundaki en yuksek degerin indexini dondurur

np.int64(184)

In [119]:
df_tips["price_per_person"].idxmax() # price_per_person sutunundaki en yuksek degerin indexini dondurur

184

In [120]:
df_tips["price_per_person"].min() # price_per_person sutunundaki en dusuk degeri dondurur

np.float64(2.88)

In [121]:
df_tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


In [122]:
df_tips.corr(numeric_only = True) # sayisal sutunlar arasindaki korelasyon katsayilarini dondurur
# makine ogrenmesi ve istatistikte degiskenler arasindaki iliskiyi anlamak icin kullanilir, onemlidir

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
total_bill,1.0,0.675734,0.598315,0.647554,0.104576
tip,0.675734,1.0,0.489299,0.347405,0.110857
size,0.598315,0.489299,1.0,-0.175359,-0.030239
price_per_person,0.647554,0.347405,-0.175359,1.0,0.13524
CC Number,0.104576,0.110857,-0.030239,0.13524,1.0


In [123]:
df_tips["sex"].value_counts() # herhangi bir sutundaki benzersiz degerlerin sayisini dondurur

sex
Male      157
Female     87
Name: count, dtype: int64

In [124]:
df_tips.day.value_counts() # day sutunundaki benzersiz degerlerin sayisini dondurur

day
Sat     87
Sun     76
Thur    62
Fri     19
Name: count, dtype: int64

In [125]:
df_tips.total_bill.sum() # total_bill sutunundaki degerlerin toplamini dondurur

np.float64(4827.77)

In [126]:
df_tips.tip.mean() # tip sutunundaki degerlerin ortalamasini dondurur

np.float64(2.99827868852459)

In [127]:
df_tips.day.unique() # day sutunundaki benzersiz degerleri dondurur

array(['Sun', 'Sat', 'Thur', 'Fri'], dtype=object)

In [128]:
df_tips.day.nunique() # day sutunundaki benzersiz degerlerin sayisini dondurur

4

# Sex Sutunundaki "Male" olanlari "M", "Female" olanlari "F" haline getirmek

### 1. Yol (.apply())

In [129]:
df_tips["sex"].apply(lambda x : "M" if x == "Male" else "F") # "Male" olanlari "M", "Female" olanlari "F"

0      F
1      M
2      M
3      M
4      F
      ..
239    M
240    F
241    M
242    M
243    F
Name: sex, Length: 244, dtype: object

### 2. Yol (.replace())

In [130]:
df_tips.sex.replace(["Male", "Female"], ["M", "F"], inplace = False)
# iki deger verdigimizde liste icinde vermemiz gerekir, ilk liste icindeki degerler ikinci liste icindeki degerlerle degistirilir

0      F
1      M
2      M
3      M
4      F
      ..
239    M
240    F
241    M
242    M
243    F
Name: sex, Length: 244, dtype: object

### 3. Yol (.map())

In [131]:
df_tips.sex.map({"Male" : "M", "Female" : "F"})
# key-value seklinde sozluk yapisi kullanilir

0      F
1      M
2      M
3      M
4      F
      ..
239    M
240    F
241    M
242    M
243    F
Name: sex, Length: 244, dtype: object

In [132]:
df_tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
