In [2]:
import numpy as np
import pandas as pd

In [3]:
# Creating a series from a Python list

my_index = ['USA', 'Canada', 'England']
my_data = [1776, 1867, 1821]

# Create a Series with default numeric index
my_ser_numeric = pd.Series(data=my_data)

# Print the Series with numeric index
print("Series with Numeric Index:")
print(my_ser_numeric)

# Create a Series with custom index
my_ser_named = pd.Series(data=my_data, index=my_index)

# Print the Series with named index
print("\nSeries with Named Index:")
print(my_ser_named)

Series with Numeric Index:
0    1776
1    1867
2    1821
dtype: int64

Series with Named Index:
USA        1776
Canada     1867
England    1821
dtype: int64


In [None]:
# Now the named index
mydata = [10, 20, 30, 40]
myindex = ['a', 'b', 'c', 'd']
myser = pd.Series(data=mydata, index=myindex)
print(myser)

# Creating a series from NumPy array
# First create a NumPy array using the earlier list
ran_data = np.random.randint(0, 100, 4)
print(ran_data)

names = ['Alice', 'Bob', 'Charles', 'Dave']
ages = pd.Series(ran_data, names)
print(ages)


a    10
b    20
c    30
d    40
dtype: int64
[92 38 89 85]
Alice      92
Bob        38
Charles    89
Dave       85
dtype: int64


In [None]:
# Creating a series form a dictionary
ages = {
    'Alice': 25,
    'Bob': 30,
    'Charles': 35,
    'Dave': 40
}
print(ages)
ages_series = pd.Series(ages)
print(ages_series)

{'Alice': 25, 'Bob': 30, 'Charles': 35, 'Dave': 40}
Alice      25
Bob        30
Charles    35
Dave       40
dtype: int64


In [6]:
# Using named index
# Imaginse sales data for 1sr and 2nd Quaters for a Global Company
q1 = {
    'Japan': 80,
    'China': 450,
    'India': 200,
    'USA': 250
}

q2 = {
    'Brazil': 100,
    'China': 500,
    'India': 210,
    'USA': 260
}

#Convert into Pandas Series
sales_q1 = pd.Series(q1)
sales_q2 = pd.Series(q2)

print(sales_q1)
print(sales_q2)


Japan     80
China    450
India    200
USA      250
dtype: int64
Brazil    100
China     500
India     210
USA       260
dtype: int64


In [10]:
# Calls values based on Named Index
print('Sales of japan in quater 1:',sales_q1['Japan'])

# Integer bases Location Information also retaine!
print('Sales of japan in quater 1:',sales_q1.iloc[0])


Sales of japan in quater 1: 80
Sales of japan in quater 1: 80


In [None]:

# Be carefull with potential errors!
try:
    # Wrong names
    print(sales_q1['Argentina'])
except KeyError as e:
    print(f"Error: {e}")

try:
    # Accidental extra spaces
    print(sales_q1['Japan '])
except KeyError as e:
    print(f"Error: {e}")

try:
    # Text case mistake
    print(sales_q1['japan'])
except KeyError as e:
    print(f"Error: {e}")

Error: 'Argentina'
Error: 'Japan '
Error: 'japan'


In [None]:
# Grab just the index keys
print(sales_q1.keys())

Index(['Japan', 'China', 'India', 'USA'], dtype='object')


In [None]:
# Can perform Operation Broadcsting across entire Series
print(sales_q1 * 2)
print(sales_q1 / 100)

Japan    160
China    900
India    400
USA      500
dtype: int64
Japan    0.8
China    4.5
India    2.0
USA      2.5
dtype: float64


In [None]:
# Notice how pandas informs you of ,ismatch with Nan
print(sales_q1 + sales_q2)

# You can fuil NAN with ant matching data type valu you want
print(sales_q1.add(sales_q2, fill_value=0))

Brazil      NaN
China     950.0
India     410.0
Japan       NaN
USA       510.0
dtype: float64
Brazil    100.0
China     950.0
India     410.0
Japan      80.0
USA       510.0
dtype: float64


In [None]:
np.random.seed(101)
mydata = np.random.randint(0, 101, (4, 3))
print(mydata)

[[95 11 81]
 [70 63 87]
 [75  9 77]
 [40  4 63]]


In [None]:
myindex = ['CA', 'NY', 'AZ', 'TX']
mycolumns = ['Jan', 'Feb', 'Mar']


### DataFrame
  - without any index column
  - with labeled index
  - with labeled index and column

In [None]:
df = pd.DataFrame(data=mydata)
print(df) # only numeric row and column

    0   1   2
0  95  11  81
1  70  63  87
2  75   9  77
3  40   4  63


In [None]:
df = pd.DataFrame(data=mydata, index=myindex)
print(df) # only numeric row and column

     0   1   2
CA  95  11  81
NY  70  63  87
AZ  75   9  77
TX  40   4  63


In [None]:
df = pd.DataFrame(data=mydata, index=myindex, columns=mycolumns)
print(df) # only numeric row and column
print(df.info())

    Jan  Feb  Mar
CA   95   11   81
NY   70   63   87
AZ   75    9   77
TX   40    4   63
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, CA to TX
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Jan     4 non-null      int64
 1   Feb     4 non-null      int64
 2   Mar     4 non-null      int64
dtypes: int64(3)
memory usage: 128.0+ bytes
None
