<a href="https://colab.research.google.com/github/pankajkumaryadav-ai-ds/AI-DS-LAB/blob/main/PANDAS_WEEK(4).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# a. Creating dataframe
import pandas as pd
# Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:
dates = pd.date_range("20130101", periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
# Creating a DataFrame by passing a dictionary of objects that can be converted into a series-like structure
import numpy as np
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
# The columns of the resulting DataFrame have different dtypes:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
# b. concat()
# Combine two Series.
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2])

0    a
1    b
0    c
1    d
dtype: object

In [9]:
# Clear the existing index and reset it in the result by setting the ignore_index option to True.
pd.concat([s1, s2], ignore_index=True)

0    a
1    b
2    c
3    d
dtype: object

In [10]:
# Label the index keys you create with the names option.
pd.concat([s1, s2], keys=['s1', 's2'],
          names=['Series name', 'Row ID'])

Series name  Row ID
s1           0         a
             1         b
s2           0         c
             1         d
dtype: object

In [11]:
# c. Setting conditions
from pandas import DataFrame

numbers = {'mynumbers': [51, 52, 53, 54, 55]}
df = DataFrame(numbers, columns =['mynumbers'])

df.loc[df['mynumbers'] <= 53, '<= 53'] = 'True'
df.loc[df['mynumbers'] > 53, '<= 53'] = 'False'

df

Unnamed: 0,mynumbers,<= 53
0,51,True
1,52,True
2,53,True
3,54,False
4,55,False


In [12]:
from pandas import DataFrame

numbers = {'mynumbers': [51, 52, 53, 54, 55]}
df = DataFrame(numbers, columns =['mynumbers'])

df['<= 53'] = df['mynumbers'].apply(lambda x: 'True' if x <= 53 else 'False')

print (df)


   mynumbers  <= 53
0         51   True
1         52   True
2         53   True
3         54  False
4         55  False


In [13]:
from pandas import DataFrame

names = {'First_name': ['Hanah', 'Ria', 'Jay', 'Bholu', 'Sachin']}
df = DataFrame(names, columns =['First_name'])

df.loc[df['First_name'] == 'Ria', 'Status'] = 'Found'
df.loc[df['First_name'] != 'Ria', 'Status'] = 'Not Found'

print (df)


  First_name     Status
0      Hanah  Not Found
1        Ria      Found
2        Jay  Not Found
3      Bholu  Not Found
4     Sachin  Not Found


In [14]:
from pandas import DataFrame

names = {'First_name': ['Hanah', 'Ria', 'Jay', 'Bholu', 'Sachin']}
df = DataFrame(names, columns =['First_name'])

df.loc[(df['First_name'] == 'Ria') | (df['First_name'] == 'Jay'), 'Status'] = 'Found'
df.loc[(df['First_name'] != 'Ria') & (df['First_name'] != 'Jay'), 'Status'] = 'Not Found'

print (df)


  First_name     Status
0      Hanah  Not Found
1        Ria      Found
2        Jay      Found
3      Bholu  Not Found
4     Sachin  Not Found


In [15]:
# d. Adding a new column
import numpy as np
import pandas as pd
df = pd.DataFrame({"A": [1, 2, 3, 4],
                   "B": [5, 6, 7, 8]})
df["C"] = [10, 20, 30, 40]
df

Unnamed: 0,A,B,C
0,1,5,10
1,2,6,20
2,3,7,30
3,4,8,40


In [16]:
df[["1of3", "2of3", "3of3"]] = np.random.randint(10, size=(4,3))
df

Unnamed: 0,A,B,C,1of3,2of3,3of3
0,1,5,10,8,4,5
1,2,6,20,0,1,6
2,3,7,30,7,2,4
3,4,8,40,8,2,7


In [17]:
df.drop(["1of3", "2of3", "3of3"], axis=1, inplace=True)
df

Unnamed: 0,A,B,C
0,1,5,10
1,2,6,20
2,3,7,30
3,4,8,40


In [18]:
df.insert(1, "D", 5)
df

Unnamed: 0,A,D,B,C
0,1,5,5,10
1,2,5,6,20
2,3,5,7,30
3,4,5,8,40


In [19]:
df.loc[:, "E"] = list("abcd")
df

Unnamed: 0,A,D,B,C,E
0,1,5,5,10,a
1,2,5,6,20,b
2,3,5,7,30,c
3,4,5,8,40,d


In [20]:
df = df.assign(F = df.C * 10)
df

Unnamed: 0,A,D,B,C,E,F
0,1,5,5,10,a,100
1,2,5,6,20,b,200
2,3,5,7,30,c,300
3,4,5,8,40,d,400
