In [1]:
# Imports
import pandas as pd
import numpy as np

In [12]:
df = pd.DataFrame(data=[[1,2],[4,5,6]], columns=['Col A', 'Col B', 'Col C'], index=['Row 1', 'Row 2'])
df

Unnamed: 0,Col A,Col B,Col C
Row 1,1,2,
Row 2,4,5,6.0


In [21]:
# Using already existing data to create a df
list1 = [1,2,6]
list2 = [9,0,24]
col_names=['A', 'B', 'C']
df = pd.DataFrame(data=[list1, list2], columns=col_names, index=pd.RangeIndex(1,3))
df

Unnamed: 0,A,B,C
1,1,2,6
2,9,0,24


In [33]:
# Using already existing data (dict) to creaate a df
dict1 = {'A': [1, 4], 'B': [2, 5], 'C': [1, 6]}
df = pd.DataFrame(dict1)
df

Unnamed: 0,A,B,C
0,1,2,1
1,4,5,6


In [35]:
# Creating a df by reading a csv file
df = pd.read_csv('data/winequality-red.csv', delimiter=';')

In [36]:
df.shape

(1599, 12)

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
fixed acidity           1599 non-null float64
volatile acidity        1599 non-null float64
citric acid             1599 non-null float64
residual sugar          1599 non-null float64
chlorides               1599 non-null float64
free sulfur dioxide     1599 non-null float64
total sulfur dioxide    1599 non-null float64
density                 1599 non-null float64
pH                      1599 non-null float64
sulphates               1599 non-null float64
alcohol                 1599 non-null float64
quality                 1599 non-null int64
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [38]:
df.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0,1599.0
mean,8.319637,0.527821,0.270976,2.538806,0.087467,15.874922,46.467792,0.996747,3.311113,0.658149,10.422983,5.636023
std,1.741096,0.17906,0.194801,1.409928,0.047065,10.460157,32.895324,0.001887,0.154386,0.169507,1.065668,0.807569
min,4.6,0.12,0.0,0.9,0.012,1.0,6.0,0.99007,2.74,0.33,8.4,3.0
25%,7.1,0.39,0.09,1.9,0.07,7.0,22.0,0.9956,3.21,0.55,9.5,5.0
50%,7.9,0.52,0.26,2.2,0.079,14.0,38.0,0.99675,3.31,0.62,10.2,6.0
75%,9.2,0.64,0.42,2.6,0.09,21.0,62.0,0.997835,3.4,0.73,11.1,6.0
max,15.9,1.58,1.0,15.5,0.611,72.0,289.0,1.00369,4.01,2.0,14.9,8.0


In [39]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [40]:
df.tail()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1594,6.2,0.6,0.08,2.0,0.09,32.0,44.0,0.9949,3.45,0.58,10.5,5
1595,5.9,0.55,0.1,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5
1598,6.0,0.31,0.47,3.6,0.067,18.0,42.0,0.99549,3.39,0.66,11.0,6


In [56]:
# Cleaning up columns names
df.columns = [x.replace(' ', '_').lower() for x in df.columns]

In [57]:
# Addressing individual columns
df['fixed_acidity']
df['fixed_acidity'].describe()


count    1599.000000
mean        8.319637
std         1.741096
min         4.600000
25%         7.100000
50%         7.900000
75%         9.200000
max        15.900000
Name: fixed_acidity, dtype: float64

In [94]:
# Slicing with iloc
df.iloc[0:1, 0:100]

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,ph,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [96]:
# Slicing with loc
df.loc[0:5, 'fixed_acidity': 'residual_sugar']

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar
0,7.4,0.7,0.0,1.9
1,7.8,0.88,0.0,2.6
2,7.8,0.76,0.04,2.3
3,11.2,0.28,0.56,1.9
4,7.4,0.7,0.0,1.9
5,7.4,0.66,0.0,1.8
