# -- DATA MANIPULATION WITH PYTHON 

## * Numpy

In [2]:
import numpy as np 

In [5]:
a=np.arange(15).reshape(3,5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [6]:
print("Array Shape: "+ (str)(a.shape))
print("Array Number of axes: "+ (str)(a.ndim))
print("Array Data Type: "+ (str)(a.dtype))
print("Array Item Size Byte: "+ (str)(a.itemsize))
print("Array Number of All Elements: "+ (str)(a.size))
print("Data Type: "+ (str)(type(a)))


Array Shape: (3, 5)
Array Number of axes: 2
Array Data Type: int32
Array Item Size Byte: 4
Array Number of All Elements: 15
Data Type: <class 'numpy.ndarray'>


### //Array creation with numpy

In [12]:
b=np.array([(1,2,3),(4,5,6)],dtype=complex)
b

array([[1.+0.j, 2.+0.j, 3.+0.j],
       [4.+0.j, 5.+0.j, 6.+0.j]])

In [23]:
np.zeros((3,4),dtype=np.int16)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int16)

In [22]:
np.ones((2,5),dtype=np.int16)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]], dtype=int16)

In [21]:
np.empty((2,3)) ##  initial content is random and depends on the state of the memory

array([[0., 0., 0.],
       [0., 0., 0.]])

In [26]:
np.arange(0.0,10.0,1.5) ##initial-final-coefficient

array([0. , 1.5, 3. , 4.5, 6. , 7.5, 9. ])

In [29]:
np.linspace(0,2,9)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

In [41]:
from numpy import pi
x = np.linspace( 0, 2*pi, 10 )
np.sin(x)

array([ 0.00000000e+00,  6.42787610e-01,  9.84807753e-01,  8.66025404e-01,
        3.42020143e-01, -3.42020143e-01, -8.66025404e-01, -9.84807753e-01,
       -6.42787610e-01, -2.44929360e-16])

### //Basic Operators

In [35]:
a*3

array([[ 0,  3,  6,  9, 12],
       [15, 18, 21, 24, 27],
       [30, 33, 36, 39, 42]])

In [39]:
a**a

array([[          1,           1,           4,          27,         256],
       [       3125,       46656,      823543,    16777216,   387420489],
       [ 1410065408,  1843829075,  -251658240, -1692154371, -1282129920]],
      dtype=int32)

In [40]:
a[2:5] # element index 2 to 5 print

array([[10, 11, 12, 13, 14]])

In [44]:
for i in a:
    print (i+5)

[5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]


In [47]:
def f(x,y):
    return 10*x+y
functionArray=np.fromfunction(f,(2,2),dtype=int)
functionArray

array([[ 0,  1],
       [10, 11]])

### //Shaping

In [12]:
c=np.arange(0,15,2)
c.reshape(4,2).T #transposed

array([[ 0,  4,  8, 12],
       [ 2,  6, 10, 14]])

In [22]:
d=np.floor(10*np.random.random((2,6)))
d

array([[5., 0., 1., 1., 8., 1.],
       [5., 3., 6., 0., 7., 5.]])

In [24]:
np.hsplit(d,3)

[array([[5., 0.],
        [5., 3.]]),
 array([[1., 1.],
        [6., 0.]]),
 array([[8., 1.],
        [7., 5.]])]

In [25]:
e=d.copy()
e

array([[5., 0., 1., 1., 8., 1.],
       [5., 3., 6., 0., 7., 5.]])

In [27]:
e is d #complete copy (deep)

False

## ----------------------------------------------------------------------------------------------

## *PANDAS

In [29]:
import numpy as np
import pandas as pd

In [31]:
s=pd.Series([1,2,3,4,np.nan,6])
s

0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
5    6.0
dtype: float64

In [32]:
s.axes

[RangeIndex(start=0, stop=6, step=1)]

In [33]:
s.dtype

dtype('float64')

In [34]:
s.size

6

In [35]:
s.ndim

1

In [36]:
s.values

array([ 1.,  2.,  3.,  4., nan,  6.])

In [37]:
s.head(3)

0    1.0
1    2.0
2    3.0
dtype: float64

In [38]:
s.tail(3)

3    4.0
4    NaN
5    6.0
dtype: float64

In [43]:
seri=pd.Series([1,2,3],index=['a','b','c'])

In [45]:
seri['b']

2

In [47]:
pd.concat([seri,seri])

a    1
b    2
c    3
a    1
b    2
c    3
dtype: int64

In [49]:
'a' in seri

True

### //Data Frames

In [51]:
ilist=[1,2,3,4]
a=pd.DataFrame(ilist,columns=['numbers'])
a

Unnamed: 0,numbers
0,1
1,2
2,3
3,4


In [52]:
a.columns

Index(['numbers'], dtype='object')

In [53]:
a.values

array([[1],
       [2],
       [3],
       [4]], dtype=int64)

In [54]:
a.loc[0:3]

Unnamed: 0,numbers
0,1
1,2
2,3
3,4


In [55]:
a.iloc[0:3]

Unnamed: 0,numbers
0,1
1,2
2,3


In [57]:
import seaborn as sea #ready data sets
df=sea.load_dataset("planets")
df

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [58]:
df.mean()

number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [59]:
df['mass'].mean()

2.6381605847953233

In [60]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [62]:
df.groupby('method').mean()

Unnamed: 0_level_0,number,orbital_period,mass,distance,year
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Astrometry,1.0,631.18,,17.875,2011.5
Eclipse Timing Variations,1.666667,4751.644444,5.125,315.36,2010.0
Imaging,1.315789,118247.7375,,67.715937,2009.131579
Microlensing,1.173913,3153.571429,,4144.0,2009.782609
Orbital Brightness Modulation,1.666667,0.709307,,1180.0,2011.666667
Pulsar Timing,2.2,7343.021201,,1200.0,1998.4
Pulsation Timing Variations,1.0,1170.0,,,2007.0
Radial Velocity,1.721519,823.35468,2.630699,51.600208,2007.518987
Transit,1.95466,21.102073,1.47,599.29808,2011.236776
Transit Timing Variations,2.25,79.7835,,1104.333333,2012.5


In [64]:
df.groupby('method').aggregate(['min',np.median,'max'])

Unnamed: 0_level_0,number,number,number,orbital_period,orbital_period,orbital_period,mass,mass,mass,distance,distance,distance,year,year,year
Unnamed: 0_level_1,min,median,max,min,median,max,min,median,max,min,median,max,min,median,max
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Astrometry,1,1,1,246.36,631.18,1016.0,,,,14.98,17.875,20.77,2010,2011.5,2013
Eclipse Timing Variations,1,2,2,1916.25,4343.5,10220.0,4.2,5.125,6.05,130.72,315.36,500.0,2008,2010.0,2012
Imaging,1,1,4,4639.15,27500.0,730000.0,,,,7.69,40.395,165.0,2004,2009.0,2013
Microlensing,1,1,2,1825.0,3300.0,5100.0,,,,1760.0,3840.0,7720.0,2004,2010.0,2013
Orbital Brightness Modulation,1,2,2,0.240104,0.342887,1.544929,,,,1180.0,1180.0,1180.0,2011,2011.0,2013
Pulsar Timing,1,3,3,0.090706,66.5419,36525.0,,,,1200.0,1200.0,1200.0,1992,1994.0,2011
Pulsation Timing Variations,1,1,1,1170.0,1170.0,1170.0,,,,,,,2007,2007.0,2007
Radial Velocity,1,1,6,0.73654,360.2,17337.5,0.0036,1.26,25.0,1.35,40.445,354.0,1989,2009.0,2014
Transit,1,1,7,0.355,5.714932,331.60059,1.47,1.47,1.47,38.0,341.0,8500.0,2002,2012.0,2014
Transit Timing Variations,2,2,3,22.3395,57.011,160.0,,,,339.0,855.0,2119.0,2011,2012.5,2014
