### Imports

In [1]:
# pip install pandas

In [2]:
import pandas as pd
import numpy as np

At the core of the pandas module, there are two important data structures: Series and DataFrame.

### Series
A series is a one dimensional array-like object that contains a sequence of values with associated labels, called the index. Each item in a series contains the same type of data which is similar to numpy's homogenous property. The data type of a series index is not limited to integers like list and other data structures.

In [3]:
ser = pd.Series(['a','b','c'])
ser

0    a
1    b
2    c
dtype: object

In [4]:
ser1 = pd.Series(('a','b','c'))
ser1

0    a
1    b
2    c
dtype: object

In [5]:
list(ser1.index)

[0, 1, 2]

In [6]:
# Creating series object with index
ser2 = pd.Series(['a','b','c'],['x', 'y', 4])
ser2

x    a
y    b
4    c
dtype: object

In [7]:
ser2.index

Index(['x', 'y', 4], dtype='object')

In [8]:
# Creating series from dictinary
dict_data = {'George': 35, 'Kevin': 20, 'Michael': 26, 'Jose': 50}
ser3 = pd.Series(dict_data)
ser3

George     35
Kevin      20
Michael    26
Jose       50
dtype: int64

In [9]:
ser3.index

Index(['George', 'Kevin', 'Michael', 'Jose'], dtype='object')

In [10]:
# Creating Series with data type
ser2 = pd.Series([1,200,30], index=['1', '2', '3'],dtype=np.int32)
ser2

1      1
2    200
3     30
dtype: int32

In [11]:
type(ser2.index)

pandas.core.indexes.base.Index

### DataFrame
Pandas DataFrame is a 2 dimensional data structure with rows and columns. It is similar to a google sheet or excel file with more than one column.

In [12]:
df = pd.DataFrame([[11, 20, 23], [4, 50, 6], [70, 18, 90]]) # Each inner list is a row in the dataframe
df

Unnamed: 0,0,1,2
0,11,20,23
1,4,50,6
2,70,18,90


In [13]:
df = pd.DataFrame([[11, 20, 23], [4, 50, 6], [70, 18, 90]],columns=['A','B','C'],index=['i1','i2','i3']) # we can assign column names
df

Unnamed: 0,A,B,C
i1,11,20,23
i2,4,50,6
i3,70,18,90


In [14]:
import pandas as pd
data = {'name': ['George', 'Kevin', 'Michael', 'Jose'],
        'age': [35, 20, 26, 35],
        'height': [5.5, 4.9, 5, 5.4]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,height
0,George,35,5.5
1,Kevin,20,4.9
2,Michael,26,5.0
3,Jose,35,5.4


In [15]:
import pandas as pd
data = {'name': ['George'],
        'age': [35],
        'height': [5.4]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,height
0,George,35,5.4


In [16]:
df1 = pd.DataFrame([(10, 22, 30), (4, 15, 16), (17, 80, 29)], columns=['A', 'B','C'],dtype=np.float64)
df1


Unnamed: 0,A,B,C
0,10.0,22.0,30.0
1,4.0,15.0,16.0
2,17.0,80.0,29.0


In [17]:
df1['B']=df1['B'].astype(np.int64)

In [18]:
df1

Unnamed: 0,A,B,C
0,10.0,22,30.0
1,4.0,15,16.0
2,17.0,80,29.0


In [19]:
df1['B'].dtype

dtype('int64')

### Reading and Writing Data

In [20]:
import numpy as np
import pandas as pd

https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html

[Iris Dataset](https://drive.google.com/file/d/1Aj55LWNHUOv4OCS4jXVHTq9m25Cvqp-a/view?usp=share_link)

In [21]:
df = pd.read_csv('data/iris.csv')
df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


Exporting data

In [22]:
import pandas as pd
data = {'name': ['George', 'Kevin', 'Michael', 'Jose'],
        'age': [35, 20, 26, 30],
        'height': [5.5, 4.9, 6, 5.4]}
df = pd.DataFrame(data)
df.to_csv('data/out.csv',index=None)

In [27]:
df.to_csv("data/out2.csv", sep='-',index=None)

In [28]:
df2=pd.read_csv("data/out2.csv",sep="-")

In [29]:
# You can also write only a subset of the columns, and in an order of your choosing:
df.to_csv("data/out3.csv", index=False, columns=['name', 'age'])

In [30]:
df.head()

Unnamed: 0,name,age,height
0,George,35,5.5
1,Kevin,20,4.9
2,Michael,26,6.0
3,Jose,30,5.4


In [36]:
my_var = pd.DataFrame(df['age'])

In [37]:
type(my_var)

pandas.core.frame.DataFrame

Exporting Series

In [38]:
import pandas as pd
ser = pd.Series(['a','b','c','d'], index=[1, 2, 3,4])
ser

1    a
2    b
3    c
4    d
dtype: object

In [39]:
ser.to_csv("out_ser.csv",index=None)

Python for Data Analysis : Chapter 5 & 6