In [31]:
import seaborn as sns
import pandas as pd
import numpy as np

planets = sns.load_dataset('planets')
planets.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [7]:
# Veri setinin üzerinde değişiklik yapmamak için kopyalıyoruz.
df = planets.copy()
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [8]:
#Veri Seti ile ilgili yapısal bilgiler.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1035 entries, 0 to 1034
Data columns (total 6 columns):
method            1035 non-null object
number            1035 non-null int64
orbital_period    992 non-null float64
mass              513 non-null float64
distance          808 non-null float64
year              1035 non-null int64
dtypes: float64(3), int64(2), object(1)
memory usage: 48.6+ KB


In [9]:
# Veri Setinin içerisindeki değişkenlerin tiplerine direk erişmek için
df.dtypes

method             object
number              int64
orbital_period    float64
mass              float64
distance          float64
year                int64
dtype: object

In [10]:
# Not: Object tipindeki değişkenleri kategorik değişkenlere dönüştürmek faydalı olacaktır.

In [11]:
df.method = pd.Categorical(df.method)
df.dtypes

method            category
number               int64
orbital_period     float64
mass               float64
distance           float64
year                 int64
dtype: object

In [15]:
df.head(10) # Veri Setinin ilk başından kaç değer görmek istiyorsak

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009
5,Radial Velocity,1,185.84,4.8,76.39,2008
6,Radial Velocity,1,1773.4,4.64,18.15,2002
7,Radial Velocity,1,798.5,,21.41,1996
8,Radial Velocity,1,993.3,10.3,73.1,2008
9,Radial Velocity,2,452.8,1.99,74.79,2010


In [16]:
df.tail(3) # Veri Setinde sondan kaç değişkene erişmek istiyorsak

Unnamed: 0,method,number,orbital_period,mass,distance,year
1032,Transit,1,3.191524,,174.0,2007
1033,Transit,1,4.125083,,293.0,2008
1034,Transit,1,4.187757,,260.0,2008


In [17]:
df.shape

(1035, 6)

In [19]:
df.columns

Index(['method', 'number', 'orbital_period', 'mass', 'distance', 'year'], dtype='object')

In [21]:
df.describe().T
# Not: describe methodu sadece sürekli değişkenleri görselleştirir.Bu nedenle 'method' değişkeni görülmemektedir. 

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [23]:
# Kategorik değişkenleri dahil ederek göstermek için
df.describe(include = 'all' ).T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
method,1035,10.0,Radial Velocity,553.0,,,,,,,
number,1035,,,,1.78551,1.24098,1.0,1.0,1.0,2.0,7.0
orbital_period,992,,,,2002.92,26014.7,0.0907063,5.44254,39.9795,526.005,730000.0
mass,513,,,,2.63816,3.81862,0.0036,0.229,1.26,3.04,25.0
distance,808,,,,264.069,733.116,1.35,32.56,55.25,178.5,8500.0
year,1035,,,,2009.07,3.97257,1989.0,2007.0,2010.0,2012.0,2014.0


In [25]:
df.nunique() 
# Değişkenlerde bulunan unique değerler

method             10
number              7
orbital_period    988
mass              381
distance          552
year               23
dtype: int64

In [26]:
df.isnull().values.any() # Gözlemler içinde eksik olup olmama durumunu sorgulama

True

In [28]:
df.isnull().sum() # Hangi değişkende kaç gözlem eksik onu gösterir.

method              0
number              0
orbital_period     43
mass              522
distance          227
year                0
dtype: int64

In [29]:
# Eksik gözlemlerin yerine '0' yazmak için
df['orbital_period'].fillna(0, inplace = True)

In [30]:
df.isnull().sum()

method              0
number              0
orbital_period      0
mass              522
distance          227
year                0
dtype: int64

In [34]:
df['mass'].fillna(np.mean(df.mass), inplace = True)
# Ortalama ile doldurmak

In [33]:
df.isnull().sum()

method              0
number              0
orbital_period      0
mass                0
distance          227
year                0
dtype: int64