In [1]:
import pandas as pd

In [2]:
pd.pandas.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv("startup_funding.csv")
df.head()

Unnamed: 0,Sr No,Date dd/mm/yyyy,Startup Name,Industry Vertical,SubVertical,City Location,Investors Name,InvestmentnType,Amount in USD,Remarks
0,1,09/01/2020,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,200000000,
1,2,13/01/2020,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394,
2,3,09/01/2020,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,18358860,
3,4,02/01/2020,https://www.wealthbucket.in/,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000,
4,5,02/01/2020,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000,


In [4]:
print(df.shape)

(3044, 10)


In [5]:
df.dtypes

Sr No                 int64
Date dd/mm/yyyy      object
Startup Name         object
Industry Vertical    object
SubVertical          object
City  Location       object
Investors Name       object
InvestmentnType      object
Amount in USD        object
Remarks              object
dtype: object

<div class = 'alert alert-block alert-warning'>
    The column 'Amount in USD' should be numeric, and 'Date dd/mm/yyyy' should be datetime type.
</div>

In [6]:
df['Amount in USD'] = (df['Amount in USD'].str.replace(',', '', regex=False).str.strip())

df['Amount in USD'] = pd.to_numeric(df['Amount in USD'], errors='coerce')
df.dtypes

Sr No                  int64
Date dd/mm/yyyy       object
Startup Name          object
Industry Vertical     object
SubVertical           object
City  Location        object
Investors Name        object
InvestmentnType       object
Amount in USD        float64
Remarks               object
dtype: object

In [7]:
df['Date dd/mm/yyyy'] = pd.to_datetime(df['Date dd/mm/yyyy'], dayfirst=True, errors='coerce')
df['Date dd/mm/yyyy'].head()

0   2020-01-09
1   2020-01-13
2   2020-01-09
3   2020-01-02
4   2020-01-02
Name: Date dd/mm/yyyy, dtype: datetime64[ns]

In [8]:
df.head()

Unnamed: 0,Sr No,Date dd/mm/yyyy,Startup Name,Industry Vertical,SubVertical,City Location,Investors Name,InvestmentnType,Amount in USD,Remarks
0,1,2020-01-09,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,200000000.0,
1,2,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394.0,
2,3,2020-01-09,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,18358860.0,
3,4,2020-01-02,https://www.wealthbucket.in/,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000.0,
4,5,2020-01-02,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000.0,


<div class="alert alert-block alert-warning">
Unwanted spaces in column names will interfere with function execution. Let's clean them up.
</div>

In [9]:
for col in df.columns:
    print(repr(col))

'Sr No'
'Date dd/mm/yyyy'
'Startup Name'
'Industry Vertical'
'SubVertical'
'City  Location'
'Investors Name'
'InvestmentnType'
'Amount in USD'
'Remarks'


In [10]:
df.columns = df.columns.str.strip().str.replace('\t', '', regex=True)

df.rename(columns={
    'Sr No': 'SrNo',
    'Date dd/mm/yyyy': 'Date_yyyy_mm_dd', 
    'Startup Name': 'StartupName',
    'Industry Vertical': 'IndustryVertical', 
    'SubVertical': 'SubVertical',
    'City  Location': 'CityLocation',
    'Investors Name': 'InvestorsName',
    'InvestmentnType': 'InvestmentType',
    'Amount in USD': 'AmountinUSD'
}, inplace=True)


df.head()

Unnamed: 0,SrNo,Date_yyyy_mm_dd,StartupName,IndustryVertical,SubVertical,CityLocation,InvestorsName,InvestmentType,AmountinUSD,Remarks
0,1,2020-01-09,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,200000000.0,
1,2,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,8048394.0,
2,3,2020-01-09,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,18358860.0,
3,4,2020-01-02,https://www.wealthbucket.in/,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,3000000.0,
4,5,2020-01-02,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,1800000.0,


In [11]:
df.to_csv('output_data.csv', index=False)