'''Dataframes

In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [2]:
my_data=randn(4,3) # 4 => Rows, 3 => Columns
my_rows=['A','B','C','D']
my_cols=['Monday','Tuesday','Wednesday']

#Creating a dataframe
my_df=pd.DataFrame(my_data,my_rows,my_cols)
print(my_df)

     Monday   Tuesday  Wednesday
A  0.300911 -0.957660   0.871146
B -0.392538 -0.534399  -0.867022
C  0.653339  1.269126   0.766307
D  0.833301  0.237713   1.947042


In [3]:
# import csv file
my_df2= pd.read_csv('industry_sic.csv')
print(my_df2)

     SIC Code                                        Description
0        1110  Growing of cereals (except rice), leguminous c...
1        1120                                    Growing of rice
2        1130  Growing of vegetables and melons, roots and tu...
3        1140                              Growing of sugar cane
4        1150                                 Growing of tobacco
..        ...                                                ...
726     98000                      Residents property management
727     98100  Undifferentiated goods-producing activities of...
728     98200  Undifferentiated service-producing activities ...
729     99000  Activities of extraterritorial organizations a...
730     99999                                    Dormant Company

[731 rows x 2 columns]


In [5]:
# Pull out rows from CSV file
print(my_df2.loc[0])



SIC Code                                                    1110
Description    Growing of cereals (except rice), leguminous c...
Name: 0, dtype: object


In [7]:
# Pulling out multiple rows

print(my_df2.loc[[0,5,2]])

   SIC Code                                        Description
0      1110  Growing of cereals (except rice), leguminous c...
5      1160                             Growing of fibre crops
2      1130  Growing of vegetables and melons, roots and tu...


In [9]:
# Grab first 5 rows
print(my_df2.head())

   SIC Code                                        Description
0      1110  Growing of cereals (except rice), leguminous c...
1      1120                                    Growing of rice
2      1130  Growing of vegetables and melons, roots and tu...
3      1140                              Growing of sugar cane
4      1150                                 Growing of tobacco


In [10]:
# Grab last 5 rows
print(my_df2.tail())

     SIC Code                                        Description
726     98000                      Residents property management
727     98100  Undifferentiated goods-producing activities of...
728     98200  Undifferentiated service-producing activities ...
729     99000  Activities of extraterritorial organizations a...
730     99999                                    Dormant Company


In [11]:
# Grab certain number of first rows
print(my_df2.head(7))

   SIC Code                                        Description
0      1110  Growing of cereals (except rice), leguminous c...
1      1120                                    Growing of rice
2      1130  Growing of vegetables and melons, roots and tu...
3      1140                              Growing of sugar cane
4      1150                                 Growing of tobacco
5      1160                             Growing of fibre crops
6      1190               Growing of other non-perennial crops


In [13]:
# Grab certain number of last rows
print(my_df2.tail(7))

     SIC Code                                        Description
724     96090                    Other service activities n.e.c.
725     97000  Activities of households as employers of domes...
726     98000                      Residents property management
727     98100  Undifferentiated goods-producing activities of...
728     98200  Undifferentiated service-producing activities ...
729     99000  Activities of extraterritorial organizations a...
730     99999                                    Dormant Company


In [15]:
# Grab information about dataframe
print(my_df2.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   SIC Code     731 non-null    int64 
 1   Description  731 non-null    object
dtypes: int64(1), object(1)
memory usage: 11.5+ KB
None


In [17]:
# Get shape of rows and columns
print(my_df2.shape)

(731, 2)


In [22]:
# Get number of dimensions
print(my_df2.ndim)

2


In [None]:
# TO Get the column names
print(my_df2.columns)

Index(['SIC Code', 'Description'], dtype='object')


In [24]:
# To get the column datatypes
print(my_df2.dtypes)

SIC Code        int64
Description    object
dtype: object


In [25]:
# Get some statistics about data

print(my_df2.describe())

           SIC Code
count    731.000000
mean   43925.389877
std    26478.315319
min     1110.000000
25%    23465.000000
50%    46120.000000
75%    64301.500000
max    99999.000000


In [27]:
# Describe a specific column
my_df2['Description'].describe()

count                 731
unique                731
top       Dormant Company
freq                    1
Name: Description, dtype: object

In [28]:
# Describe a specific column
my_df2['SIC Code'].describe()

count      731.000000
mean     43925.389877
std      26478.315319
min       1110.000000
25%      23465.000000
50%      46120.000000
75%      64301.500000
max      99999.000000
Name: SIC Code, dtype: float64

In [29]:
# Select specific column using brackets
print(my_df2['SIC Code'])

0       1110
1       1120
2       1130
3       1140
4       1150
       ...  
726    98000
727    98100
728    98200
729    99000
730    99999
Name: SIC Code, Length: 731, dtype: int64


In [32]:
# Select specific column using Location
my_df2.iloc[:,0]

0       1110
1       1120
2       1130
3       1140
4       1150
       ...  
726    98000
727    98100
728    98200
729    99000
730    99999
Name: SIC Code, Length: 731, dtype: int64