In [1]:
import numpy as np
import pandas as pd


### Creating a Pandas Series

In [17]:
arr = np.arange(12)

In [18]:
s1 = pd.Series(arr)
print(s1)

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
dtype: int64


In [6]:
#numeric series
s = pd.Series([2, 4, 5, 6, 9])
print(s)
print(type(s))

0    2
1    4
2    5
3    6
4    9
dtype: int64
<class 'pandas.core.series.Series'>


In [7]:
# character series
char_series = pd.Series(['a', 'b', 'af'])
char_series

0     a
1     b
2    af
dtype: object

In [11]:
date_series = pd.date_range(start = '01-01-2018', end = '01-10-2018') ##MM-DD-YYYY
date_series
#type(date_series)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10'],
              dtype='datetime64[ns]', freq='D')

#### Indexing Series

Indexing series is exactly same as 1-D numpy arrays - index starts at 0.

In [23]:
arr = np.arange(12)
np.random.shuffle(arr)
s1 = pd.Series(arr)
print(s1)

0      2
1      7
2      8
3      0
4      5
5      4
6      6
7      1
8     11
9      3
10     9
11    10
dtype: int64


In [24]:
s1[4]

5

In [25]:
s1[4:8]

4    5
5    4
6    6
7    1
dtype: int64

In [26]:
s1[[2, 4, 6]]

2    8
4    5
6    6
dtype: int64

#### Explicitly specifying indices

You might have noticed that while creating a series, Pandas automatically indexes it from 0 to (n-1), n being the number of rows. But if we want, we can also explicitly set the index ourselves, using the ‘index’ argument while creating the series using `pd.Series()`

In [29]:
pd.Series([1, 2, 4], index=['a', 'b', 'c'])

a    1
b    2
c    4
dtype: int64

In [34]:
pd.Series(np.array(['a']*10), index = range(0,10))

0    a
1    a
2    a
3    a
4    a
5    a
6    a
7    a
8    a
9    a
dtype: object

In [35]:
# creating a series
series1 = pd.Series([5, 2, 3,7], index=['a', 'b', 'c', 'd'])
 
# creating a series
series2 = pd.Series([1, 6, 4, 9], index=['a', 'b', 'd', 'e'])

In [38]:
series1.add(series2, fill_value=0)

a     6.0
b     8.0
c    13.0
d    11.0
e    19.0
dtype: float64

### Dataframe

In [39]:
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)

In [40]:
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [43]:
df2 = pd.DataFrame(np.arange(1, 10).reshape(3,3), columns=['a', 'b', 'c'])

In [44]:
df2.head()

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [46]:
data = pd.read_csv('iris.csv')
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [49]:
data.summary()

AttributeError: 'DataFrame' object has no attribute 'summary'

In [4]:
data = pd.read_csv('winequality-red.csv', sep=';')

In [5]:
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
