# PANDAS - Python Packages for Data Science
A python package to deal with tabular data. 

In [5]:
#### Importing libraries 
import numpy as np
import pandas as pd


In [7]:
# Creating a dataframe from a database 
df = pd.DataFrame({
    'Item':['Apple', 'Orange', 'Banana', 'Watermelon'],
    'Quantity': [12, 10, 100, 3],
    'Price': [1, 2, 0.5, 9]
})

In [8]:
# Show the dataframe 
df

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
2,Banana,100,0.5
3,Watermelon,3,9.0


In [9]:
# Information about the dataframe (null, count, type, memory usage, etc.)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Item      4 non-null      object 
 1   Quantity  4 non-null      int64  
 2   Price     4 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 224.0+ bytes


In [11]:
# Show columns labels and type
df.columns

Index(['Item', 'Quantity', 'Price'], dtype='object')

In [12]:
# Number of indexes (range, stop, step)

df.index

RangeIndex(start=0, stop=4, step=1)

In [13]:
# Size
df.size

12

In [50]:
# Right-most element
a[-1]

3

In [14]:
# Shape (rows, columns)
df.shape

(4, 3)

In [17]:
# Types of categories
df.dtypes

Item         object
Quantity      int64
Price       float64
dtype: object

### Acessing elements in a DataFrame

In [18]:
# Acess column by name
df['Item']

0         Apple
1        Orange
2        Banana
3    Watermelon
Name: Item, dtype: object

In [19]:
# Acess column by name
df['Price']

0    1.0
1    2.0
2    0.5
3    9.0
Name: Price, dtype: float64

In [20]:
# Acess column by name
df['Quantity']

0     12
1     10
2    100
3      3
Name: Quantity, dtype: int64

In [22]:
# Acess cell by column - index 
df['Item'][0]

'Apple'

In [25]:
# Eliminate the last element
df['Price'][0]

1.0

In [26]:
# Acess all cells for one element by column
df.loc[0]

Item        Apple
Quantity       12
Price         1.0
Name: 0, dtype: object

### Selection

In [27]:
# Select by a factor (all prices over 5) and generates a column for false and true values
df['Price'] > 5

0    False
1    False
2    False
3     True
Name: Price, dtype: bool

In [28]:
# Show the rows that only satisfy the selection with it respectives columns 
df[df['Price'] > 5] 

Unnamed: 0,Item,Quantity,Price
3,Watermelon,3,9.0


In [30]:
df['Quantity'] < 20

0     True
1     True
2    False
3     True
Name: Quantity, dtype: bool

In [31]:
df[df['Quantity'] < 20]

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
3,Watermelon,3,9.0


In [32]:
# Assigning a variable to a selection
result = df['Price'] < 5
result


0     True
1     True
2     True
3    False
Name: Price, dtype: bool

In [33]:
# Visualizing the variable assigned to a selection in a table form
df[result]

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
2,Banana,100,0.5


### Adding new columns

In [35]:
# Creating a new column called Total
df['Total']  = df['Price'] * df['Quantity']
df

Unnamed: 0,Item,Quantity,Price,Total
0,Apple,12,1.0,12.0
1,Orange,10,2.0,20.0
2,Banana,100,0.5,50.0
3,Watermelon,3,9.0,27.0


## Statistics 

In [36]:
# Assigning values
total = df['Total']

In [37]:
# Min & Max
total.min() , total.max()

(12.0, 50.0)

In [39]:
# STD
total.std()

16.357974609753292

In [40]:
# Variance
total.var()

267.5833333333333

In [41]:
# Median
total.median()

23.5

In [47]:
# A specific quantile
total.quantile(.25)

18.0

In [48]:
# A 1+ quantiles
total.quantile([0.25, 0.50,.75])

0.25    18.00
0.50    23.50
0.75    32.75
Name: Total, dtype: float64

In [49]:
# Describe all data
df.describe()

Unnamed: 0,Quantity,Price,Total
count,4.0,4.0,4.0
mean,31.25,3.125,27.25
std,45.995471,3.966001,16.357975
min,3.0,0.5,12.0
25%,8.25,0.875,18.0
50%,11.0,1.5,23.5
75%,34.0,3.75,32.75
max,100.0,9.0,50.0


In [50]:
# Specific describe
total.describe()

count     4.000000
mean     27.250000
std      16.357975
min      12.000000
25%      18.000000
50%      23.500000
75%      32.750000
max      50.000000
Name: Total, dtype: float64

### Load from a csv file


In [53]:
file = 'automobile.csv'
df = pd.read_csv(file)

### See you soon! 