# Copy and View in Numpy

In [4]:
# view 
import numpy as np

arr = np.array([1,2,3,4,5,6])
x = arr.view()

print(id(arr))
print(id(x))

x[0] = 100

print(arr)
print(x)

2078370692720
2078370692624
[100   2   3   4   5   6]
[100   2   3   4   5   6]


In [6]:
# view 
import numpy as np

arr = np.array([1,2,3,4,5,6])
x = arr

print(id(arr))
print(id(x))

x[0] = 100

print(arr)
print(x)

2078361991664
2078361991664
[100   2   3   4   5   6]
[100   2   3   4   5   6]


In [2]:
a = [1,2,3,4,5]
b = a

print(id(a))
print(id(b))

b[1] = 100

print(a)
print(b)

2078371212608
2078371212608
[1, 100, 3, 4, 5]
[1, 100, 3, 4, 5]


In [8]:
# copy 
import numpy as np

arr = np.array([1,2,3,4,5,6])
x = arr.copy()

print(id(arr))
print(id(x))

x[0] = 100

print(arr)
print(x)

2078370692720
2078370691664
[1 2 3 4 5 6]
[100   2   3   4   5   6]


In [9]:
# copy 
import numpy as np

arr = np.array([[1,2,3],[4,5,6]])
x = arr.copy()

print(id(arr))
print(id(x))

x[0][1] = 100

print(arr)
print(x)

2078362105104
2078370692720
[[1 2 3]
 [4 5 6]]
[[  1 100   3]
 [  4   5   6]]


In [12]:
# copy 
import numpy as np

arr = np.array([1,2,3,4,5,6])
x = arr.view()

print(id(arr))
print(id(x))

x[0] = 100

print(arr)
print(x)

2078370692816
2078370691664
[100   2   3   4   5   6]
[100   2   3   4   5   6]


#### The main difference between a copy and a view of an array is that the copy is a new array, and the view is just a view of the original array.

1. The copy owns the data and any changes made to the copy will not affect original array, and any changes made to the original array will not affect the copy.

2. The view does not own the data and any changes made to the view will affect the original array, and any changes made to the original array will affect the view.


In [22]:
# view 

arr = np.array([1,2,3,4,5,6])
x = arr.view()

print(id(arr))
print(id(x))
x[0] = 100

print(arr)
print(x)

2078361991664
2078370692336
[100   2   3   4   5   6]
[100   2   3   4   5   6]


In [14]:
# copy 

arr = np.array([1,2,3,4,5,6])
x = arr.copy()

print(id(arr))
print(id(x))

x[0] = 100

print(arr)
print(x)

2078361991664
2078370692336
[1 2 3 4 5 6]
[100   2   3   4   5   6]


# Pandas

In [None]:
- data representation in pandas is better than numpy
- Can store hetrogenous data

In [23]:
import pandas as pd

In [None]:
# We have 2 main data structures

* Seriesdata ---> 1D
* Dataframe ----> 2D

# Series 

In [None]:
Different ways to create a series

1. List
2. Array
3. dic

In [24]:
# List

river = ['Ganga', 'Yamuna', 'neil', 'kaveri', 'godavari']

In [25]:
river_name = pd.Series(river)
print(type(river_name))

<class 'pandas.core.series.Series'>


In [26]:
river_name

0       Ganga
1      Yamuna
2        neil
3      kaveri
4    godavari
dtype: object

In [27]:
# array

random_series = pd.Series(np.random.randn(5))
random_series

0   -1.070910
1    1.806144
2   -1.076364
3    0.616349
4    0.073275
dtype: float64

In [28]:

random_series = pd.Series(np.random.randn(5) , index = ['a','b','c','d','e'])
random_series

a   -2.265168
b    0.542559
c    0.770359
d    1.706033
e   -0.049882
dtype: float64

In [29]:

random_series = pd.Series(np.random.randn(5) , index = ['a','b','c','d'])
random_series

ValueError: Length of values (5) does not match length of index (4)

In [31]:

random_series = pd.Series(np.random.randn(5) , index = ['a','b','c','d','e','f'])
random_series

ValueError: Length of values (5) does not match length of index (6)

In [33]:
river_name

0       Ganga
1      Yamuna
2        neil
3      kaveri
4    godavari
dtype: object

In [34]:
river_name = pd.Series(river , index = ['a','b','c','d','e'])

In [35]:
river_name

a       Ganga
b      Yamuna
c        neil
d      kaveri
e    godavari
dtype: object

In [36]:
river_name[3]

'kaveri'

In [37]:
river_name['d']

'kaveri'

In [39]:
river_name['d'] = 'Krishna'

In [40]:
river_name

a       Ganga
b      Yamuna
c        neil
d     Krishna
e    godavari
dtype: object

In [41]:
river_name[2:4]

c       neil
d    Krishna
dtype: object

In [42]:
river_name[::-1]

e    godavari
d     Krishna
c        neil
b      Yamuna
a       Ganga
dtype: object

In [44]:
river_name

a       Ganga
b      Yamuna
c        neil
d     Krishna
e    godavari
dtype: object

In [45]:
river_name['b':'e']

b      Yamuna
c        neil
d     Krishna
e    godavari
dtype: object

In [46]:

river_name['b':'c']

b    Yamuna
c      neil
dtype: object

1. For default index values [inclusive: exclusive]
2. For user defined index values [inclusive:inclusive]

In [47]:

river_name['c':'b':-1]

c      neil
b    Yamuna
dtype: object

In [49]:

river_name['a':'e':2]

a       Ganga
c        neil
e    godavari
dtype: object

In [48]:
river_name

a       Ganga
b      Yamuna
c        neil
d     Krishna
e    godavari
dtype: object

In [85]:
brics_country = ['Brazil', 'Russia', 'India', 'China', 'South Africa']

brics_currency = ['Real', 'Ruble', 'Rupee', 'Renminbi', 'Rand' ]

pd.Series(values , index = var)

In [51]:
data = pd.Series(brics_country , index = brics_currency)
data

Real              Brazil
Ruble             Russia
Rupee              India
Renminbi           China
Rand        South Africa
dtype: object

In [52]:
# If you want to see only the index data

data.index

Index(['Real', 'Ruble', 'Rupee', 'Renminbi', 'Rand'], dtype='object')

In [53]:
# If you want to see only the values data

data.values

array(['Brazil', 'Russia', 'India', 'China', 'South Africa'], dtype=object)

In [54]:
a = data.values
print(type(a))

<class 'numpy.ndarray'>


In [None]:
pd.Series()

In [55]:
contry = ['Afghanistan', 'Albania', 'Algeria', 'Andorra']

capital = ['Kabul', 'Tirane', 'Algiers', 'Andorra la Vella' ]

currency = ['Afghani', 'Lek' ,'Dinar' ,'Euro']

In [75]:
data_series = pd.Series(capital , index=[contry, currency], dtype= 'str')
data_series

Afghanistan  Afghani               Kabul
Albania      Lek                  Tirane
Algeria      Dinar               Algiers
Andorra      Euro       Andorra la Vella
dtype: object

In [63]:
print(type(data_series))

<class 'pandas.core.series.Series'>


In [64]:
data_series.index

MultiIndex([('Afghanistan', 'Afghani'),
            (    'Albania',     'Lek'),
            (    'Algeria',   'Dinar'),
            (    'Andorra',    'Euro')],
           )

In [65]:
data_series.values

array(['Kabul', 'Tirane', 'Algiers', 'Andorra la Vella'], dtype=object)

In [66]:
data_series['Albania']

Lek    Tirane
dtype: object

In [73]:
dic = {(1,2,3):(1,2,43), 'b':2 , 'c':4 }

dic

{(1, 2, 3): (1, 2, 43), 'b': 2, 'c': 4}

In [79]:
dic = {'a':1 , 'b':2 , 'c':4 }

dic

{'a': 1, 'b': 2, 'c': 4}

In [77]:
data = pd.Series(dic)
data

a    1
b    2
c    4
dtype: int64

In [82]:
data = pd.Series(dic , index = ['b', 'd' , 'e'])
data

b    2.0
d    NaN
e    NaN
dtype: float64

In [83]:
data_series

Afghanistan  Afghani               Kabul
Albania      Lek                  Tirane
Algeria      Dinar               Algiers
Andorra      Euro       Andorra la Vella
dtype: object

In [88]:
river_name

a       Ganga
b      Yamuna
c        neil
d     Krishna
e    godavari
dtype: object

In [89]:
data

b    2.0
d    NaN
e    NaN
dtype: float64

In [90]:
pd.concat([river_name , data])

a       Ganga
b      Yamuna
c        neil
d     Krishna
e    godavari
b         2.0
d         NaN
e         NaN
dtype: object

# Dataframe

In [97]:
country = ['Afghanistan', 'Albania', 'Algeria', 'Andorra']

capital = ['Kabul', 'Tirane', 'Algiers', 'Andorra la Vella' ]

currency = ['Afghani', 'Lek' ,'Dinar' ,'Euro']

In [95]:
data = pd.DataFrame(country)
data

Unnamed: 0,0
0,Afghanistan
1,Albania
2,Algeria
3,Andorra


In [100]:
data = pd.DataFrame(country )
data

Unnamed: 0,0
0,Afghanistan
1,Albania
2,Algeria
3,Andorra


In [96]:
data = pd.DataFrame([country ,capital , currency])
data

Unnamed: 0,0,1,2,3
0,Afghanistan,Albania,Algeria,Andorra
1,Kabul,Tirane,Algiers,Andorra la Vella
2,Afghani,Lek,Dinar,Euro


In [118]:
df = pd.read_csv('C://Users//MIT//Desktop//datasets/ted_data.csv')

In [119]:
df

Unnamed: 0,name_speaker,speaker_occupation,title,views,comments
0,Ken Robinson,Author/educator,Do schools kill creativity?,47227110,4553
1,Al Gore,Climate advocate,Averting the climate crisis,3200520,265
2,David Pogue,Technology columnist,Simplicity sells,1636292,124
3,Majora Carter,Activist for environmental justice,Greening the ghetto,1697550,200
4,Hans Rosling,Global health expert; data visionary,The best stats you've ever seen,12005869,593
5,Tony Robbins,Life coach; expert in leadership psychology,Why we do what we do,20685401,672
6,Julia Sweeney,"Actor, comedian, playwright",Letting go of God,3769987,919
7,Joshua Prince-Ramus,Architect,Behind the design of Seattle's library,967741,46
8,Dan Dennett,"Philosopher, cognitive scientist",Let's teach religion -- all religion -- in sch...,2567958,582
9,Rick Warren,"Pastor, author",A life of purpose,3095993,900


In [120]:
df = pd.read_csv('datasets/ted_data.csv')

In [121]:
df

Unnamed: 0,name_speaker,speaker_occupation,title,views,comments
0,Ken Robinson,Author/educator,Do schools kill creativity?,47227110,4553
1,Al Gore,Climate advocate,Averting the climate crisis,3200520,265
2,David Pogue,Technology columnist,Simplicity sells,1636292,124
3,Majora Carter,Activist for environmental justice,Greening the ghetto,1697550,200
4,Hans Rosling,Global health expert; data visionary,The best stats you've ever seen,12005869,593
5,Tony Robbins,Life coach; expert in leadership psychology,Why we do what we do,20685401,672
6,Julia Sweeney,"Actor, comedian, playwright",Letting go of God,3769987,919
7,Joshua Prince-Ramus,Architect,Behind the design of Seattle's library,967741,46
8,Dan Dennett,"Philosopher, cognitive scientist",Let's teach religion -- all religion -- in sch...,2567958,582
9,Rick Warren,"Pastor, author",A life of purpose,3095993,900


In [122]:
# excel
df_excel = pd.read_excel('datasets/football_worldcup.xlsx')
df_excel

Unnamed: 0,Year,Country,Winner,Runners-Up,GoalsScored,MatchesPlayed
0,1990,Italy,Germany,Argentina,115,52
1,1994,USA,Brazil,Italy,141,52
2,1998,France,France,Brazil,171,64
3,2002,Japan,Brazil,Germany,161,64
4,2006,Germany,Italy,France,147,64
5,2010,South Africa,Spain,Netherlands,145,64
6,2014,Brazil,Germany,Argentina,171,64


In [123]:
df_dict = {'Year' : [1990, 1994, 1998, 2002 , 2004],
           'Country' : ['Italy', 'USA', 'France', 'Japan'],
           'Winner' : ['Germany', 'Brazil', 'France', 'Brazil'],
           'GoalScored' : [115, 141, 171, 161]
          }

In [124]:
data = pd.DataFrame(df_dict)
data

ValueError: All arrays must be of the same length

In [125]:
df_dict = {'Year' : [1990, 1994, 1998, 2002 ],
           'Country' : ['Italy', 'USA', 'France', 'Japan'],
           'Winner' : ['Germany', 'Brazil', 'France', 'Brazil'],
           'GoalScored' : [115, 141, 171, 161]
          }

In [126]:
data = pd.DataFrame(df_dict)
data

Unnamed: 0,Year,Country,Winner,GoalScored
0,1990,Italy,Germany,115
1,1994,USA,Brazil,141
2,1998,France,France,171
3,2002,Japan,Brazil,161


In [129]:
df_dict = {'Year' : pd.Series([1990, 1994, 1998, 2002 , 2004]),
           'Country' : pd.Series(['Italy', 'USA', 'France', 'Japan']),
           'Winner' : pd.Series(['Germany', 'Brazil', 'France', 'Brazil']),
           'GoalScored' : pd.Series([115, 141, 171, 161])
          }

In [130]:
data = pd.DataFrame(df_dict)
data

Unnamed: 0,Year,Country,Winner,GoalScored
0,1990,Italy,Germany,115.0
1,1994,USA,Brazil,141.0
2,1998,France,France,171.0
3,2002,Japan,Brazil,161.0
4,2004,,,


In [138]:
brics_country

['Brazil', 'Russia', 'India', 'China', 'South Africa']

In [139]:
brics_currency

['Real', 'Ruble', 'Rupee', 'Renminbi', 'Rand']

In [140]:
data = pd.DataFrame([brics_country,brics_currency])
data

Unnamed: 0,0,1,2,3,4
0,Brazil,Russia,India,China,South Africa
1,Real,Ruble,Rupee,Renminbi,Rand


In [None]:
country -->
currency -->

In [141]:
df_dict = {'Country': brics_country,
          'currency': brics_currency}

In [142]:
data = pd.DataFrame(df_dict)
data

Unnamed: 0,Country,currency
0,Brazil,Real
1,Russia,Ruble
2,India,Rupee
3,China,Renminbi
4,South Africa,Rand


In [149]:
brics_country = ['Brazil', 'Russia', 'India', 'China', 'South Africa' , 'country']

brics_currency = ['Real', 'Ruble', 'Rupee', 'Renminbi' ,np.nan ,'values']

In [150]:
df_dict = {'Country': pd.Series(brics_country),
          'currency': pd.Series(brics_currency)}

In [151]:
data = pd.DataFrame(df_dict)
data

Unnamed: 0,Country,currency
0,Brazil,Real
1,Russia,Ruble
2,India,Rupee
3,China,Renminbi
4,South Africa,
5,country,values
