In [27]:
import numpy as np
import pandas as pd

data=np.array([["John",20,"Male"],["",22,"Female"],["Henry",25,"Male"],["Smith",30,"Male"],["Susan",27,"Female"],["Minendra",19,"Male"]])
print(data)

df=pd.DataFrame(data,columns=['Name','Age','Gender'])

print(df)

[['John' '20' 'Male']
 ['' '22' 'Female']
 ['Henry' '25' 'Male']
 ['Smith' '30' 'Male']
 ['Susan' '27' 'Female']
 ['Minendra' '19' 'Male']]
       Name Age  Gender
0      John  20    Male
1            22  Female
2     Henry  25    Male
3     Smith  30    Male
4     Susan  27  Female
5  Minendra  19    Male


#### Pandas DataFrame comes is a powerful tool that allows us to store and manipulate data in a structured way, 

In [28]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [29]:
print(df.head(4))

    Name Age  Gender
0   John  20    Male
1         22  Female
2  Henry  25    Male
3  Smith  30    Male


In [30]:
print(df.head())

    Name Age  Gender
0   John  20    Male
1         22  Female
2  Henry  25    Male
3  Smith  30    Male
4  Susan  27  Female


In [31]:
print(df.tail(2))

       Name Age  Gender
4     Susan  27  Female
5  Minendra  19    Male


In [33]:
print(df.tail())

       Name Age  Gender
1            22  Female
2     Henry  25    Male
3     Smith  30    Male
4     Susan  27  Female
5  Minendra  19    Male


## Difference Between Series and DataFrame in Pandas

In **Pandas**, both `Series` and `DataFrame` are data structures used for data manipulation.

### Series
- A **Series** is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.).
- It is similar to a column in a spreadsheet or a database table.

### DataFrame
- A **DataFrame** is a two-dimensional labeled data structure with columns of potentially different types.
- It is similar to a table in a database or an Excel spreadsheet.

In [35]:
import pandas as pd 
series = pd.Series([10,20,30,40],name="Numbers")
print(series)

print("\nType of Series: ",type(series))

0    10
1    20
2    30
3    40
Name: Numbers, dtype: int64

Type of Series:  <class 'pandas.core.series.Series'>


In [56]:
#Creating data frame
data={
    "Name":["Alice","Bob","Charlie","David"],
    "Age":[25,30,35,40]
}
df=pd.DataFrame(data)
print(df)

print("\nType of Dataframe: ",type(df))

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40

Type of Dataframe:  <class 'pandas.core.frame.DataFrame'>


In [37]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [44]:
print(df.index.values)

[0 1 2 3]


In [57]:
df=df.set_index('Name')
print(df)

         Age
Name        
Alice     25
Bob       30
Charlie   35
David     40


In [46]:
df.index.values

array(['Alice', 'Bob', 'Charlie', 'David'], dtype=object)

In [47]:
df.index.value_counts

<bound method IndexOpsMixin.value_counts of Index(['Alice', 'Bob', 'Charlie', 'David'], dtype='object', name='Name')>

In [None]:
import pandas as pd 
print("Before reset_index(): ")
print(df)
df.reset_index(inplace=True)

print("\nAfter reset_index() with inplace=True: ")
print(df)

Before reset_index(): 
         Age
Name        
Alice     25
Bob       30
Charlie   35
David     40

After reset_index() with inplace=True: 
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [62]:
#Set custom index 
df.index=['a','b','c','d']
print(df)

      Name  Age
a    Alice   25
b      Bob   30
c  Charlie   35
d    David   40


In [63]:
df.reset_index(drop=True,inplace=True)
print(df)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   40


In [64]:
#get index as a list 
index_lst=df.index.tolist()
print(index_lst)

[0, 1, 2, 3]


In [65]:
df["Name"]

0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object

In [66]:
df["Age"]

0    25
1    30
2    35
3    40
Name: Age, dtype: int64

In [70]:
type(df[["Name"]])

pandas.core.frame.DataFrame

In [68]:
type(df["Name"])

pandas.core.series.Series

In [71]:
df[["Name"]]

Unnamed: 0,Name
0,Alice
1,Bob
2,Charlie
3,David


In [72]:
row=df.loc[1]
print(row)

Name    Bob
Age      30
Name: 1, dtype: object


In [74]:
for i in range(3):
    row=df.loc[i]
    print("\n",row)


 Name    Alice
Age        25
Name: 0, dtype: object

 Name    Bob
Age      30
Name: 1, dtype: object

 Name    Charlie
Age          35
Name: 2, dtype: object


In [76]:
rows=df.loc[::1,['Name']]
print(rows)

      Name
0    Alice
1      Bob
2  Charlie
3    David


In [79]:
rows=df.loc[1:2,['Name','Age']]
print(rows)

      Name  Age
1      Bob   30
2  Charlie   35


In [81]:
data=np.array([["John",20,"Male"],["Kim",22,"Female"],["Henry",25,"Male"],["Smith",30,"Male"],["Austin",27,"Male"]])
df=pd.DataFrame(data,columns=['Name','Age','Gender'],index=['A','B','C','D','E'])
print(df)

     Name Age  Gender
A    John  20    Male
B     Kim  22  Female
C   Henry  25    Male
D   Smith  30    Male
E  Austin  27    Male


In [82]:
df.loc["A":"D":2,["Name","Age"]]

Unnamed: 0,Name,Age
A,John,20
C,Henry,25


In [83]:
df.loc[["A","B"]]

Unnamed: 0,Name,Age,Gender
A,John,20,Male
B,Kim,22,Female
