# Import Libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
# help(pd.Series)

# From list to pd.Series

In [3]:
mylist = [1, 2, 3]
print(mylist)

[1, 2, 3]


In [4]:
myarray = np.array(mylist)
print(myarray)

[1 2 3]


In [5]:
myseries = pd.Series(myarray, index=['a', 'b', 'c'])
print(myseries)

a    1
b    2
c    3
dtype: int32


In [6]:
# Bir örnek daha
age = [23, 25, 22, 21]

In [7]:
pd.Series(age, index=["Michael", "Mike", "Jill", "Jennifer"])

Michael     23
Mike        25
Jill        22
Jennifer    21
dtype: int64

# From dict to pd.Series

In [8]:
age_dict = {
    "Michael": 23,
    "Mike": 25,
    "Jill": 22,
    "Jennifer": 21
}

In [9]:
age_series = pd.Series(age_dict)

In [10]:
print(age_series)

Michael     23
Mike        25
Jill        22
Jennifer    21
dtype: int64


In [11]:
age_series["Jennifer"]

21

# From list and index to pd.Series

In [12]:
grades = [77, 80, 54, 66]
names = ["Seda", "Ahmet", "Ceren", "Serkan"]

In [13]:
grades_series = pd.Series(data=grades, index=names)
print(grades_series)

Seda      77
Ahmet     80
Ceren     54
Serkan    66
dtype: int64


In [14]:
grades_series += 5

In [15]:
print(grades_series)

Seda      82
Ahmet     85
Ceren     59
Serkan    71
dtype: int64


In [16]:
grades_series["Ceren"]

59

In [17]:
# labels (keys)
grades_series.keys()

Index(['Seda', 'Ahmet', 'Ceren', 'Serkan'], dtype='object')

In [18]:
# values (değerler)
grades_series.values

array([82, 85, 59, 71], dtype=int64)

In [19]:
type(grades_series)

pandas.core.series.Series

# Pandas DataFrame

In [20]:
# help(pd.DataFrame)

In [21]:
# Random temperatures
np.random.seed(17)
temp = np.random.randint(5, 25, size=(4, 4))
print(temp)

[[20 22 20 11]
 [21 18  9 12]
 [17 12  6 19]
 [22 14 15  7]]


In [22]:
mycols = ["Oca", "Şub", "Mar", "Nis"]
myrows = ["Ank", "İst", "İzm", "Ant"]

In [23]:
pd.DataFrame(temp)         # 2 boyutlu numpy array DataFrame'e dönüştürüldü.

Unnamed: 0,0,1,2,3
0,20,22,20,11
1,21,18,9,12
2,17,12,6,19
3,22,14,15,7


In [24]:
pd.DataFrame(temp, columns=mycols)   # DataFrame sadece sütun isimleri ile oluşturuldu

Unnamed: 0,Oca,Şub,Mar,Nis
0,20,22,20,11
1,21,18,9,12
2,17,12,6,19
3,22,14,15,7


In [25]:
pd.DataFrame(temp, index=myrows)     # DataFrame sadece satır isimleri ile oluşturuldu

Unnamed: 0,0,1,2,3
Ank,20,22,20,11
İst,21,18,9,12
İzm,17,12,6,19
Ant,22,14,15,7


In [26]:
pd.DataFrame(temp, index=myrows, columns=mycols)  # DataFrame satır ve sütun isimleri ile oluşturuldu

Unnamed: 0,Oca,Şub,Mar,Nis
Ank,20,22,20,11
İst,21,18,9,12
İzm,17,12,6,19
Ant,22,14,15,7


In [27]:
pd.DataFrame(None, index=myrows, columns=mycols)   # Boş bir DataFrame oluşturuldu

Unnamed: 0,Oca,Şub,Mar,Nis
Ank,,,,
İst,,,,
İzm,,,,
Ant,,,,


In [28]:
df = pd.DataFrame(temp, columns=mycols, index=myrows)  # DataFrame ile df örneği oluşturuldu.

In [29]:
df

Unnamed: 0,Oca,Şub,Mar,Nis
Ank,20,22,20,11
İst,21,18,9,12
İzm,17,12,6,19
Ant,22,14,15,7


In [30]:
type(df)

pandas.core.frame.DataFrame

## Pandas DataFrame from Dictionary

In [31]:
d = {
    "A": 1.0,
    "B": pd.Timestamp("20230507"),
    "C": pd.Series(1, index=["a","b","c","d"], dtype="float32"),
    "D": np.array([3] * 4),
    "E": pd.Categorical(["test", "train", "test", "train"]),
    "F": "text",
    "G": np.nan
}

In [32]:
df = pd.DataFrame(data=d)

In [33]:
df

Unnamed: 0,A,B,C,D,E,F,G
a,1.0,2023-05-07,1.0,3,test,text,
b,1.0,2023-05-07,1.0,3,train,text,
c,1.0,2023-05-07,1.0,3,test,text,
d,1.0,2023-05-07,1.0,3,train,text,


In [34]:
mydict = {
    "names": ["Kobe", "Michael", "Dirk"],
    "age": [42, 45, 47],
    "gender": ["M", "M", "M"],
    "alive": [False, True, True]
}

In [36]:
df = pd.DataFrame(data=mydict, index=["Kob", "Mic", "Dir"])
df

Unnamed: 0,names,age,gender,alive
Kob,Kobe,42,M,False
Mic,Michael,45,M,True
Dir,Dirk,47,M,True


In [37]:
df.dtypes

names     object
age        int64
gender    object
alive       bool
dtype: object

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, Kob to Dir
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   names   3 non-null      object
 1   age     3 non-null      int64 
 2   gender  3 non-null      object
 3   alive   3 non-null      bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 99.0+ bytes


In [39]:
type(df)

pandas.core.frame.DataFrame

In [41]:
df["alive"]

Kob    False
Mic     True
Dir     True
Name: alive, dtype: bool

In [42]:
type(df["alive"])

pandas.core.series.Series

In [43]:
df.alive

Kob    False
Mic     True
Dir     True
Name: alive, dtype: bool

In [45]:
df["age"]

Kob    42
Mic    45
Dir    47
Name: age, dtype: int64

In [48]:
df["age"].mean()

44.666666666666664

In [49]:
df.age.median()

45.0

## Reading a file from Local

In [52]:
pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [55]:
# Bilgisayardan csv dosyası yüklemek
tips = pd.read_csv("tips.csv")
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [56]:
tips["total_bill"]

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [57]:
tips["total_bill"].mean()

19.78594262295082

In [58]:
tips["size"]

0      2
1      3
2      3
3      2
4      4
      ..
239    3
240    2
241    2
242    2
243    2
Name: size, Length: 244, dtype: int64

In [59]:
tips.size

1708

In [62]:
tips.head(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [63]:
tips.sample(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
93,16.32,4.3,Female,Yes,Fri,Dinner,2
172,7.25,5.15,Male,Yes,Sun,Dinner,2
97,12.03,1.5,Male,Yes,Fri,Dinner,2
11,35.26,5.0,Female,No,Sun,Dinner,4
100,11.35,2.5,Female,Yes,Fri,Dinner,2
81,16.66,3.4,Male,No,Thur,Lunch,2
37,16.93,3.07,Female,No,Sat,Dinner,3
118,12.43,1.8,Female,No,Thur,Lunch,2
209,12.76,2.23,Female,Yes,Sat,Dinner,2
226,10.09,2.0,Female,Yes,Fri,Lunch,2


In [64]:
tips.tail()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.0,Female,Yes,Sat,Dinner,2
241,22.67,2.0,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2
243,18.78,3.0,Female,No,Thur,Dinner,2


In [66]:
tips["time"].value_counts()

time
Dinner    176
Lunch      68
Name: count, dtype: int64

In [67]:
tips["day"].value_counts()

day
Sat     87
Sun     76
Thur    62
Fri     19
Name: count, dtype: int64

In [68]:
tips["smoker"].value_counts()

smoker
No     151
Yes     93
Name: count, dtype: int64

In [69]:
tips.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [73]:
tips[(tips["total_bill"] > 30) & (tips["total_bill"] < 35)]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
39,31.27,5.0,Male,No,Sat,Dinner,3
44,30.4,5.6,Male,No,Sun,Dinner,4
47,32.4,6.0,Male,No,Sun,Dinner,4
52,34.81,5.2,Female,No,Sun,Dinner,4
83,32.68,5.0,Male,Yes,Thur,Lunch,2
85,34.83,5.17,Female,No,Thur,Lunch,4
141,34.3,6.7,Male,No,Thur,Lunch,6
167,31.71,4.5,Male,No,Sun,Dinner,4
173,31.85,3.18,Male,Yes,Sun,Dinner,2
175,32.9,3.11,Male,Yes,Sun,Dinner,2


In [77]:
len(tips[tips["day"] == "Fri"])

19

In [80]:
(tips["day"] == "Fri").sum()

19

In [83]:
tips.nlargest(10, columns="total_bill")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
170,50.81,10.0,Male,Yes,Sat,Dinner,3
212,48.33,9.0,Male,No,Sat,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
156,48.17,5.0,Male,No,Sun,Dinner,6
182,45.35,3.5,Male,Yes,Sun,Dinner,3
102,44.3,2.5,Female,Yes,Sat,Dinner,3
197,43.11,5.0,Female,Yes,Thur,Lunch,4
142,41.19,5.0,Male,No,Thur,Lunch,5
184,40.55,3.0,Male,Yes,Sun,Dinner,2
95,40.17,4.73,Male,Yes,Fri,Dinner,4


In [84]:
tips.nsmallest(5, columns="total_bill")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
67,3.07,1.0,Female,Yes,Sat,Dinner,1
92,5.75,1.0,Female,Yes,Fri,Dinner,2
111,7.25,1.0,Female,No,Sat,Dinner,1
172,7.25,5.15,Male,Yes,Sun,Dinner,2
149,7.51,2.0,Male,No,Thur,Lunch,2


## Demo

In [85]:
import seaborn as sns

In [86]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [88]:
mpg = sns.load_dataset("mpg")

In [90]:
mpg.sample(10)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
196,24.5,4,98.0,60.0,2164,22.1,76,usa,chevrolet woody
291,19.2,8,267.0,125.0,3605,15.0,79,usa,chevrolet malibu classic (sw)
269,30.9,4,105.0,75.0,2230,14.5,78,usa,dodge omni
76,18.0,4,121.0,112.0,2933,14.5,72,europe,volvo 145e (sw)
263,17.7,6,231.0,165.0,3445,13.4,78,usa,buick regal sport coupe (turbo)
29,27.0,4,97.0,88.0,2130,14.5,71,japan,datsun pl510
255,25.1,4,140.0,88.0,2720,15.4,78,usa,ford fairmont (man)
9,15.0,8,390.0,190.0,3850,8.5,70,usa,amc ambassador dpl
209,19.0,4,120.0,88.0,3270,21.9,76,europe,peugeot 504
352,29.9,4,98.0,65.0,2380,20.7,81,usa,ford escort 2h


1. Bu verisetinde kaç satır ve sütun vardır?

In [91]:
mpg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
 8   name          398 non-null    object 
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [92]:
mpg.shape

(398, 9)

2. "mpg" sütununun ortalaması nedir?

In [94]:
mpg["mpg"].mean()

23.514572864321607

In [99]:
mpg.describe()["mpg"]["mean"]

23.514572864321607

3. Kaç farklı "cylinders" vardır?

In [100]:
mpg["cylinders"].unique()

array([8, 4, 6, 3, 5], dtype=int64)

In [101]:
mpg["cylinders"].value_counts()

cylinders
4    204
8    103
6     84
3      4
5      3
Name: count, dtype: int64

4. "chevy s-10" isimli arabanın "horsepower" değeri nedir?

In [110]:
mpg[mpg["name"] == "chevy s-10"]["horsepower"]

397    82.0
Name: horsepower, dtype: float64

## Boolean Indexing (Conditional Filtering)

Origini USA olan ve horsepower'ı 120'nin üzerinde olan araçların tanımlayıcı istatistikleri nelerdir?

In [118]:
pd.set_option("display.precision", 3)

In [119]:
mpg[(mpg["origin"] == "usa") & (mpg["horsepower"] > 120)].describe()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
count,98.0,98.0,98.0,98.0,98.0,98.0,98.0
mean,14.653,7.98,347.663,161.48,4137.837,12.701,73.714
std,2.379,0.202,45.995,26.018,441.676,1.851,2.936
min,9.0,6.0,231.0,125.0,3086.0,8.0,70.0
25%,13.0,8.0,307.0,145.0,3832.5,11.5,71.25
50%,14.0,8.0,350.0,150.0,4159.5,13.0,73.0
75%,16.0,8.0,377.25,175.0,4424.25,13.925,76.0
max,23.0,8.0,455.0,230.0,5140.0,18.5,79.0


### isin()

In [None]:
# Origini Europe ve Japan olan arabaları filtrele

In [121]:
mpg[mpg["origin"].isin(["europe", "japan"])]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
14,24.0,4,113.0,95.0,2372,15.0,70,japan,toyota corona mark ii
18,27.0,4,97.0,88.0,2130,14.5,70,japan,datsun pl510
19,26.0,4,97.0,46.0,1835,20.5,70,europe,volkswagen 1131 deluxe sedan
20,25.0,4,110.0,87.0,2672,17.5,70,europe,peugeot 504
21,24.0,4,107.0,90.0,2430,14.5,70,europe,audi 100 ls
...,...,...,...,...,...,...,...,...,...
383,38.0,4,91.0,67.0,1965,15.0,82,japan,honda civic
384,32.0,4,91.0,67.0,1965,15.7,82,japan,honda civic (auto)
385,38.0,4,91.0,67.0,1995,16.2,82,japan,datsun 310 gx
390,32.0,4,144.0,96.0,2665,13.9,82,japan,toyota celica gt


### Reset Index

### Drop a row

### Drop a column