## Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
my_series = pd.Series([1, "cat", 10.2, "dog"])
my_series

0       1
1     cat
2    10.2
3     dog
dtype: object

In [3]:
my_series[1]

'cat'

In [4]:
ages = pd.Series([20, 53, 68], index=["John", "Allen", "Mary"])
ages

John     20
Allen    53
Mary     68
dtype: int64

In [5]:
ages["John"]

20

In [6]:
ages["Mary"]

68

# dataframe

## pd.DataFrame({ 'label1' : [col1], 'label2': [col2], .... })

In [7]:
df = pd.DataFrame(
    {
        "user": [1, 2, 3],
        "age": [24, 54, 17],
        "sex": ["F", "F", "M"],
        "occupation":["technician", "musician", "student"],
    }
)

In [8]:
df

Unnamed: 0,user,age,sex,occupation
0,1,24,F,technician
1,2,54,F,musician
2,3,17,M,student


In [9]:
display(df)

Unnamed: 0,user,age,sex,occupation
0,1,24,F,technician
1,2,54,F,musician
2,3,17,M,student


In [10]:
df.set_index("user") # New copy of df

Unnamed: 0_level_0,age,sex,occupation
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,F,technician
2,54,F,musician
3,17,M,student


In [11]:
display(df) # df itself does not change

Unnamed: 0,user,age,sex,occupation
0,1,24,F,technician
1,2,54,F,musician
2,3,17,M,student


In [12]:
# inplace = true to change the original data

df.set_index("user", inplace=True)

In [13]:
df # now df is changed

Unnamed: 0_level_0,age,sex,occupation
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,F,technician
2,54,F,musician
3,17,M,student


## Summerizing data

In [14]:
df.head()

Unnamed: 0_level_0,age,sex,occupation
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,F,technician
2,54,F,musician
3,17,M,student


In [15]:
df.tail()

Unnamed: 0_level_0,age,sex,occupation
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,24,F,technician
2,54,F,musician
3,17,M,student


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 1 to 3
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   age         3 non-null      int64 
 1   sex         3 non-null      object
 2   occupation  3 non-null      object
dtypes: int64(1), object(2)
memory usage: 96.0+ bytes


In [17]:
df.index

Int64Index([1, 2, 3], dtype='int64', name='user')

In [19]:
df.columns

Index(['age', 'sex', 'occupation'], dtype='object')

In [20]:
list(df.columns)

['age', 'sex', 'occupation']

In [21]:
df.shape # we got a tuple

(3, 3)

In [22]:
df.shape[0] # Number of rows

3

In [23]:
df.shape[1] # Number of columns

3

In [24]:
df.dtypes #data types of columns in the dataframe

age            int64
sex           object
occupation    object
dtype: object

In [25]:
df.describe()# Statistics of numeric columns

Unnamed: 0,age
count,3.0
mean,31.666667
std,19.655364
min,17.0
25%,20.5
50%,24.0
75%,39.0
max,54.0


In [27]:
df.describe(include='all') #Statistics of all columns regardless of data types

Unnamed: 0,age,sex,occupation
count,3.0,3,3
unique,,2,3
top,,F,technician
freq,,2,1
mean,31.666667,,
std,19.655364,,
min,17.0,,
25%,20.5,,
50%,24.0,,
75%,39.0,,


## selecting column

In [28]:
df["occupation"] # It is a series as each column in the dataframe is series

user
1    technician
2      musician
3       student
Name: occupation, dtype: object

## Changing a Series to a Dataframe

In [29]:
df["occupation"].to_frame() # Now change it to dataframe using to_frame()

Unnamed: 0_level_0,occupation
user,Unnamed: 1_level_1
1,technician
2,musician
3,student


In [30]:
ages #It is a series

John     20
Allen    53
Mary     68
dtype: int64

In [31]:
#column name is name of the series by default
#you can change the column name as age 

ages.to_frame(name="age")

Unnamed: 0,age
John,20
Allen,53
Mary,68


## Importing data from file