In [None]:
import pandas as pd


# 01- How to find version

In [None]:
# Check pandas version
pd.__version__

In [None]:
# Another way to show pandas verison 
pd.show_versions()

 
# 02- Make a DataFrame

In [None]:
df = pd.DataFrame({'Col A':[ 1,2,3], "Col B": [4,5,6]})
df

In [None]:
# Array conversion to DataFrame
import numpy as np
array = np.array([[1,2,3], [4,5,6], [7,8,9]])
array

df = pd.DataFrame(array)
df

In [None]:
# Creating a DataFrame with random floats 
df = pd.DataFrame(np.random.rand(4,8), columns=list('ABCDEFGH'))
df

# 03- How to rename a columns?

In [None]:
# Creating a DataFrame with random integers and specific index and columns with names
df = pd.DataFrame(np.random.randint(1,11,size=(4,8))
                  ,columns=list("ABCDEFGH")
                  , index=['R1', 'R2', 'R3', 'R4'])
df

In [None]:
df = pd.DataFrame(np.random.randint(1,21, size = (3,3)))
df



In [None]:
# Change colums name
df.columns=['col1', 'col2','col3']
df

In [None]:
# Change rows name
df.index=["Row-1", "Row-2", "Row-3"]
df

In [None]:
# to replace any charactor or string
df.columns = df.columns.str.replace("-","_")

df

In [None]:
# adding perfix in columns
df = df.add_prefix("A_")
df

In [None]:
# adding suffix in columns
df = df.add_suffix("_X")
df

In [None]:
df.columns=["col-1", "col-2", 'col-3']
df

# 04- Using tamplate data

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

df = sns.load_dataset("titanic")
df.head()

In [None]:
# summary
df.describe()

In [None]:
# columns name
df.columns

In [None]:
# rows count
df.index

In [None]:
# save DataFrame to CSV
df.to_csv("zyx.csv")

In [None]:
# converted to excel
df.to_excel("xyz.xlsx")
df.head(3)

# 05- Using your own data  

In [None]:
# read from excel
df = pd.read_csv("zyx.csv")
df

# 06- Reverse rows order

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
df = sns.load_dataset("titanic")
df.head(3)

In [None]:
# reverse order
df.loc[::-1].head()

In [None]:
# reset index 
df.loc[::-1].reset_index(drop=True).head()

# 07- Reverse columns order

In [None]:
df.loc[:, ::-1].head(3)


# 08- Select a colum by data type

In [None]:
# select only numeric columns
df.select_dtypes(include=["number"]).head()

In [None]:
# select only categoryical value
df.select_dtypes(include=['category']).head()

In [None]:
# select only object type columns
df.select_dtypes(include='object').head()

In [None]:
#select only Float64 type columns
df.select_dtypes(include='Float64').head()

In [None]:
# select only number and category type columns
df.select_dtypes(include=['number', 'category']).head(3)

In [None]:
# select all columns except number type
# This will include all non-numeric columns such as object, category, etc.
df.select_dtypes(exclude=['number']).head()

In [None]:
# select all columns except category type
# This will include all non-category columns such as object, number, etc.
df.select_dtypes(exclude=['category']).head()

# 09- convert numaric into string 


In [None]:
#
df = pd.DataFrame({ "col_A" :[1,2,3,4,5],
                   'col_B' :[6,7,8,9,10]})
df.dtypes

In [None]:
# Convert column 'col_A' to string type and 'col_B' to string type
df.astype({'col_A': 'str', "col_B": 'str'}).dtypes

In [None]:
# Convert column 'col_A' to numeric type, coercing errors means that if there are any non-numeric values, they will be replaced with NaN
pd.to_numeric(df['col_A'],errors="coerce")

# 10- Reduce DataFrame size

In [None]:
# reduce the DataFrame to a sample of 10% of its rows
df = sns.load_dataset('titanic')

df.sample(frac=0.1).shape
df.info()

# 11- copy DataFrame from clip board

In [None]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')
df.to_excel("titanic_sample.xlsx", index=False)

In [None]:
# read from clipboard
df = pd.read_clipboard()
df

# 12- Split dataframe in two subsets

In [None]:
import pandas as pd
import seaborn as sns

df = sns.load_dataset('titanic')



In [None]:
from random import random
df_1 = df.sample(frac=0.50, random_state=1 )
df_1.shape

In [None]:
df_2 = df.drop(df_1.index)
df_2.shape

In [None]:
df_1.head(4)

In [None]:
df_2.head(4)

In [None]:
df_1.shape

In [None]:
df_2.shape

In [None]:
len(df_2) + len(df_1)

# 13- Concat two datasets in one dataframe

In [None]:
df = pd.concat([df_1, df_2])
df.shape

# 14- Filtering a dataframe

In [None]:
df.head()

In [None]:
df.sex.unique()

In [None]:
df[(df.sex=="male")]

In [None]:
df.who.unique()

In [None]:
df[(df.who=='child') & (df.age>14)]

In [None]:
df[(df.alive=='yes')].shape

In [None]:
df[(df.alive=='no')].shape

In [None]:
df.head()

In [None]:
df[
    ( (df.embark_town == 'Southampton') ) |
    ( (df.embark_town == 'Queenstown') & (df.alone == "True") )
]


In [None]:
filter_df = df [(
    (df.age >= 18) &
    (df.sex == "female") &
    (df.alive == 'yes')

)]

In [None]:
result = filter_df[['age', 'sex', 'alive']]
result


# 15- Filter by large category

In [None]:
df.sex.value_counts()

In [None]:
df.alive.value_counts()

In [None]:
df.alone.value_counts()

In [None]:
df.age.value_counts().nlargest(4)

In [None]:
df.age.value_counts().nsmallest(4).index

# 16- Spliting a string into multiple columns

In [272]:
df = pd.DataFrame({"name": ['Ali Raza','Junaid Maseed','Ajmal Afridi'],
                   "address": ['palosa Peshawar', 'warirstan england', 'Hassen khel'] })

df [['First_name', 'Last_name'] ] = df.name.str.split(' ' ,expand=True)

In [273]:
df

Unnamed: 0,name,address,First_name,Last_name
0,Ali Raza,palosa Peshawar,Ali,Raza
1,Junaid Maseed,warirstan england,Junaid,Maseed
2,Ajmal Afridi,Hassen khel,Ajmal,Afridi


In [277]:
new_df = df[['Village', 'State']] = df.address.str.split(' ', expand=True)

In [278]:
new_df

Unnamed: 0,0,1
0,palosa,Peshawar
1,warirstan,england
2,Hassen,khel


In [279]:
df

Unnamed: 0,name,address,First_name,Last_name,Village,State
0,Ali Raza,palosa Peshawar,Ali,Raza,palosa,Peshawar
1,Junaid Maseed,warirstan england,Junaid,Maseed,warirstan,england
2,Ajmal Afridi,Hassen khel,Ajmal,Afridi,Hassen,khel
