# Pandas

- pandas is a software library written for the Python programming language for data manipulation and analysis. 
- It offers data structures and operations for manipulating numerical tables and time series. 
- It is free software released under the three-clause BSD license.

In [23]:
# Import libraries
import pandas as pd
import sklearn
from sklearn import datasets
print('The pandas version is {}.'.format(pd.__version__))
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The pandas version is 1.4.4.
The scikit-learn version is 1.0.2.


## Load data

### Load as Numpy Array

In [46]:
# Load iris dataset
iris = datasets.load_iris()
print("Database type: ", type(iris))
print("Data type: ", type(iris.data))
print("Data dims: ", iris.data.shape)
print("Target type: " ,type(iris.target))
print("Target dims: " ,iris.target.shape)

Database type:  <class 'sklearn.utils.Bunch'>
Data type:  <class 'numpy.ndarray'>
Data dims:  (150, 4)
Target type:  <class 'numpy.ndarray'>
Target dims:  (150,)


### Convert Numpy Array Data into Dataframe

In [55]:
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### Rename Columns

In [56]:
df = df.rename(columns={"sepal length (cm)": "sepal_length", "sepal width (cm)": "sepal_width"}, errors='raise')
df.rename(columns={"petal length (cm)": "petal_length", "petal width (cm)": "petal_width"}, errors='raise', inplace=True)
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### Load Data as Pandas Dataframe

In [57]:
iris2 = datasets.load_iris(as_frame=True)
print(type(iris2))
print("Data type:" ,type(iris2.data))
print("Target type:" ,type(iris2.target))

<class 'sklearn.utils.Bunch'>
Data type: <class 'pandas.core.frame.DataFrame'>
Target type: <class 'pandas.core.series.Series'>


### Combine Dataframes

In [71]:
df2 = pd.concat([iris2.data, iris2.target], axis=1)
df2.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2
149,5.9,3.0,5.1,1.8,2


### Re-Order Columns

In [72]:
cols = df2.columns.tolist()
print('Original order\n', cols)

Original order
 ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)', 'target']


In [74]:
# Move last column in first place
cols = cols[-1:] + cols[:-1]
print('Re-ordered columns\n', cols)

Re-ordered columns
 ['target', 'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [75]:
# Re-ordered dataframe
df2 = df2[cols] # OR df2 = df2.loc[:, cols]
df2.tail()

Unnamed: 0,target,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
145,2,6.7,3.0,5.2,2.3
146,2,6.3,2.5,5.0,1.9
147,2,6.5,3.0,5.2,2.0
148,2,6.2,3.4,5.4,2.3
149,2,5.9,3.0,5.1,1.8
