## How to read in data

In [28]:
import pandas as pd

### Creating Dataframe from Dictionary

-------------------------------------------------------------------------------------------------------------------------------

In [8]:
data = {
    'apples': [3, 2, 0, 1], 
    'oranges': [0, 3, 7, 2]
}

In [9]:
purchases = pd.DataFrame(data)

purchases

Unnamed: 0,apples,oranges
0,3,0
1,2,3
2,0,7
3,1,2


###  The Index of this DataFrame was given to us on creation as the numbers 0-3, but we could also create our  own when we initialize the DataFrame.

### Let's have customer names as our index:

In [10]:
purchases = pd.DataFrame(data, index=['June', 'Robert', 'Lily', 'David'])

purchases

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


In [2]:
df = pd.DataFrame(
{'Company':['Google','Amazon','Infosys','Directi'],
'Age':['21','23','38','22'] })
print (df)

   Company Age
0   Google  21
1   Amazon  23
2  Infosys  38
3  Directi  22


In [4]:
df = pd.DataFrame(
{'Company':['Google','Amazon','Yahoo','Infosys','Directi'],
'Age':['21','23','24','38','22'] },
index=['Alpha','Beta','Gamma','Delta','alex'])
print(df)

       Company Age
Alpha   Google  21
Beta    Amazon  23
Gamma    Yahoo  24
Delta  Infosys  38
alex   Directi  22


-------------------------------------------------------------------------------------------------------------------------------

## Reading data from CSVs

<h3 style="color:green" align="left"> Excercise 1 </h3>

In [29]:
url = "C:/Users/deepusuresh/Documents/Data Science/01. Python/1. Practice/2. PANDAS/1. Data Frame"

df = pd.read_csv('purchases_index.csv')
df

Unnamed: 0.1,Unnamed: 0,apples,oranges
0,June,3,0
1,Robert,2,3
2,Lily,0,7
3,David,1,2


#### CSVs don't have indexes like our DataFrames, so all we need to do is just designate the index_col when reading:

In [30]:
df = pd.read_csv('purchases_index.csv', index_col=0)

df

Unnamed: 0,apples,oranges
June,3,0
Robert,2,3
Lily,0,7
David,1,2


#### Here we're setting the index to be column zero.
#### You'll find that most CSVs won't ever have an index column and so usually you don't have to worry about this step.

<h3 style="color:green" align="left"> Excercise 2 </h3>

#### Method 1

In [5]:
df = pd.read_csv("F:/03. Suresh/1. Material/05. Data Science/14. Jupyter/08. Data Sets/datasets/iris.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa


In [6]:
df = pd.read_csv("F:/03. Suresh/1. Material/05. Data Science/14. Jupyter/08. Data Sets/datasets/iris.csv", index_col=0)
df.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


#### Method 2

In [15]:
df = pd.read_csv("F:/03. Suresh/1. Material/05. Data Science/14. Jupyter/08. Data Sets/datasets/iris.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa


In [18]:
df.set_index('Unnamed: 0', inplace=True)
df.head()

Unnamed: 0_level_0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


In [19]:
df.index.name=None
df.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


#### Method 3

In [27]:
df = pd.read_csv("F:/03. Suresh/1. Material/05. Data Science/14. Jupyter/08. Data Sets/datasets/iris.csv", header=0, names=['Sepal.Length','Sepal.Width','Petal.Length','Petal.Width','Species'])
df 

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
146,6.7,3.0,5.2,2.3,virginica
147,6.3,2.5,5.0,1.9,virginica
148,6.5,3.0,5.2,2.0,virginica
149,6.2,3.4,5.4,2.3,virginica


<h3 style="color:green" align="left"> Excercise 3 </h3>

In [34]:
url = "C:/Users/deepusuresh/Documents/Data Science/01. Python/1. Practice/2. PANDAS/1. Data Frame"

df = pd.read_csv('purchases.csv')
df

Unnamed: 0,Name,Unnamed: 1,apples,oranges
0,Jahn,Male,3,0
1,Andria,Female,2,3
2,Lily,Female,0,7
3,David,Male,1,2


In [35]:
df = pd.read_csv('purchases.csv', index_col=0)

df

Unnamed: 0_level_0,Unnamed: 1,apples,oranges
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jahn,Male,3,0
Andria,Female,2,3
Lily,Female,0,7
David,Male,1,2


In [36]:
df = pd.read_csv('purchases.csv', index_col="Name")

df

Unnamed: 0_level_0,Unnamed: 1,apples,oranges
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jahn,Male,3,0
Andria,Female,2,3
Lily,Female,0,7
David,Male,1,2


-------------------------------------------------------------------------------------------------------------------------------