In [1]:
#Pandas : for working with tables/spreadsheets
import pandas as pd

In [2]:
#Numpy : for math operations and handling missing values
import numpy as np

In [None]:
#load the data

In [5]:
df = pd.read_csv(r"C:\Users\Admin03\Desktop\Startup-funding-Analysis\data\Unicorn_Companies.csv")
# 'df' is now your entire dataset in memory
# df = DataFrame (standard variable name everyone uses)

# Why use read_csv()?
# - Loads CSV into a table format you can work with
# - Much faster than Excel for large files
# - Can handle millions of rows

In [None]:
# INSPECT THE DATA (Get to know what you have)

In [4]:
# 1. How many rows and columns?
print("Dataset Shape(rows, columns):", df.shape)

Dataset Shape(rows, columns): (1074, 10)


In [5]:
# 2. See first 5 rows (like scrolling to top in Excel)
print("\nFirst 5 rows:")
print(df.head())


First 5 rows:
     Company Valuation Date_Joined                         Industry  \
0  Bytedance     $180B  07-04-2017          Artificial intelligence   
1     SpaceX     $100B  01-12-2012                            Other   
2      SHEIN     $100B  03-07-2018  E-commerce & direct-to-consumer   
3     Stripe      $95B  23-01-2014                          Fintech   
4     Klarna      $46B  12-12-2011                          Fintech   

            City        Country      Continent  Year_Founded Funding  \
0        Beijing          China           Asia          2012     $8B   
1      Hawthorne  United States  North America          2002     $7B   
2       Shenzhen          China           Asia          2008     $2B   
3  San Francisco  United States  North America          2010     $2B   
4      Stockholm         Sweden         Europe          2005     $4B   

                                    Select Investors  
0  Sequoia Capital China, SIG Asia Investments, S...  
1  Founders Fun

In [6]:
# 3. See last 5 rows (check the end)
print("\nLast 5 rows:")
print(df.tail())


Last 5 rows:
          Company Valuation Date_Joined                         Industry  \
1069     Zhaogang       $1B  29-06-2017  E-commerce & direct-to-consumer   
1070  Zhuan Zhuan       $1B  18-04-2017  E-commerce & direct-to-consumer   
1071     Zihaiguo       $1B  06-05-2021                Consumer & retail   
1072         Zopa       $1B  19-10-2021                          Fintech   
1073        Zwift       $1B  16-09-2020  E-commerce & direct-to-consumer   

            City         Country      Continent  Year_Founded Funding  \
1069    Shanghai           China           Asia          2012   $379M   
1070     Beijing           China           Asia          2015   $990M   
1071   Chongqing           China           Asia          2018    $80M   
1072      London  United Kingdom         Europe          2005   $792M   
1073  Long Beach   United States  North America          2014   $620M   

                                       Select Investors  
1069    K2 Ventures, Matrix Part

In [7]:
# 4. See random 5 rows (get a feel for variety)
print("\nRandom sample:")
print(df.sample(5))


Random sample:
        Company Valuation Date_Joined                         Industry  \
778  MindTickle       $1B  03-08-2021     Internet software & services   
160  OfBusiness       $5B  31-07-2021  E-commerce & direct-to-consumer   
584      OCSiAl       $2B  04-03-2019                            Other   
521     Aledade       $2B  19-01-2021                           Health   
258       BYTON       $3B  20-04-2018            Auto & transportation   

              City        Country      Continent  Year_Founded Funding  \
778  San Francisco  United States  North America          2012   $281M   
160        Gurgaon          India           Asia          2015   $734M   
584     Leudelange     Luxembourg         Europe          2012   $285M   
521       Bethesda  United States  North America          2014   $295M   
258        Nanjing          China           Asia          2016     $1B   

                                      Select Investors  
778          Qualcomm Ventures, Accel

In [None]:
# Why inspect first?
# - Understand what you're working with
# - Spot obvious issues early
# - See column names and data types

In [None]:
# CHECK DATA TYPES
# Common types:
# - object = text (like Excel "Text")
# - int64 = whole numbers (like Excel "Number")
# - float64 = decimals (like Excel "Number" with decimals)
# - datetime64 = dates (like Excel "Date")

In [8]:
# See what TYPE each column is
print("\nColumn Data Types:")
print(df.dtypes)


Column Data Types:
Company             object
Valuation           object
Date_Joined         object
Industry            object
City                object
Country             object
Continent           object
Year_Founded         int64
Funding             object
Select Investors    object
dtype: object


In [None]:
# Why check types?
# - "object" for numbers means it's stored as text
# - Can't do math on text!
# - Need to convert text ‚Üí numbers

In [None]:
# CHECK FOR MISSING VALUES
# Why check nulls?
# - Need to know what's missing
# - Decide how to handle (fill? remove? ignore?)
# - Missing data can break calculations

In [9]:
# Count how many nulls/blanks in each column
print("\nMissing value per column:")
print(df.isnull().sum())


Missing value per column:
Company              0
Valuation            0
Date_Joined          0
Industry             0
City                16
Country              0
Continent            0
Year_Founded         0
Funding              0
Select Investors     1
dtype: int64


In [None]:
# How it works:
# - df.isnull() ‚Üí Returns True/False for each cell (True = missing)
# - .sum() ‚Üí Counts the True values per column

In [None]:
# GET BASIC STATISTICS

In [10]:
# Quick statistical summary
print("\nBasic statistics:")
print(df.describe())


Basic statistics:
       Year_Founded
count   1074.000000
mean    2012.895717
std        5.698573
min     1919.000000
25%     2011.000000
50%     2014.000000
75%     2016.000000
max     2021.000000


In [None]:
# Shows for each numeric column:
# - count: how many non-null values
# - mean: average
# - std: standard deviation
# - min: smallest value
# - 25%: first quartile
# - 50%: median
# - 75%: third quartile
# - max: largest value

# Why use describe()?
# - Quick overview of your numbers
# - Spot outliers (min/max way off?)
# - See if data makes sense

In [None]:
# PROBLEM: Funding column has text, not numbers

In [6]:
def clean_funding(value):
    """
    Converts funding text to numeric millions.
    
    Parameters:
    -----------
    value : str
        The funding value (e.g., "$8B", "$500M")
    
    Returns:
    --------
    float
        The funding in millions, or NaN if unknown
        
    Examples:
    ---------
    "$8B" ‚Üí 8000.0
    "$500M" ‚Üí 500.0
    "Unknown" ‚Üí NaN
    """
    
    # Step 1: Check if value is missing or "Unknown"
    if pd.isna(value) or value == 'Unknown':
        return np.nan  # Return NaN (Not a Number = missing)
    
    # Step 2: Convert to string (in case it's not)
    value = str(value)
    
    # Step 3: Remove $ sign and commas
    # "$8,000B" ‚Üí "8000B"
    value = value.replace('$', '').replace(',', '')
    
    # Step 4: Check if it's in Billions or Millions
    if 'B' in value:
        # Remove 'B' and convert to number
        number = float(value.replace('B', ''))
        # Convert billions to millions (multiply by 1000)
        return number * 1000
        # Example: "8B" ‚Üí 8 ‚Üí 8 * 1000 = 8000 millions
    
    elif 'M' in value:
        # Remove 'M' and convert to number
        number = float(value.replace('M', ''))
        # Already in millions, return as-is
        return number
        # Example: "500M" ‚Üí 500 millions
    
    else:
        # If neither B nor M, something's wrong
        return np.nan

# APPLY THE FUNCTION TO ALL ROWS

# Create a NEW column with cleaned values
df['Funding_Millions'] = df['Funding'].apply(clean_funding)

In [None]:
# VERIFY IT WORKED

In [17]:
#Compare original vs cleaned
print("\nFunding Cleaning Results:")
print(df[['Company', 'Funding', 'Funding_Millions']].head(20))


Funding Cleaning Results:
         Company Funding  Funding_Millions
0      Bytedance     $8B            8000.0
1         SpaceX     $7B            7000.0
2          SHEIN     $2B            2000.0
3         Stripe     $2B            2000.0
4         Klarna     $4B            4000.0
5          Canva   $572M             572.0
6   Checkout.com     $2B            2000.0
7      Instacart     $3B            3000.0
8      JUUL Labs    $14B           14000.0
9     Databricks     $3B            3000.0
10       Revolut     $2B            2000.0
11    Epic Games     $7B            7000.0
12           FTX     $2B            2000.0
13      Fanatics     $4B            4000.0
14         Chime     $2B            2000.0
15        BYJU's     $4B            4000.0
16   J&T Express     $5B            5000.0
17   Xiaohongshu   $918M             918.0
18          Miro   $476M             476.0
19     Yuanfudao     $4B            4000.0


In [18]:
# Check how many were successfully converted
successful = df['Funding_Millions'].notna().sum()
total = len(df)
print(f"\nSuccessfully converted: {successful}/{total} ({successful/total*100:.1f}%)")


Successfully converted: 1062/1074 (98.9%)


In [19]:
# Check for any unexpected values
print("\nMin funding:", df['Funding_Millions'].min())
print("Max funding:", df['Funding_Millions'].max())
print("Mean funding:", df['Funding_Millions'].mean())


Min funding: 0.0
Max funding: 14000.0
Mean funding: 557.2693032015065


In [None]:
# Clean the Valuation Column

In [24]:
def clean_valuation(value):
    """
    Converts valuation text to numeric billions.
    
    "$180B" ‚Üí 180.0
    "$2B" ‚Üí 2.0
    """
    
    # Check if missing
    if pd.isna(value):
        return np.nan
    
    # Remove $ and commas
    value = str(value).replace('$', '').replace(',', '')
    
    # Extract number
    if 'B' in value:
        number = float(value.replace('B', ''))
        # Keep in billions (don't multiply)
        return number
    
    else:
        return np.nan

# Apply to create new column
df['Valuation_Billions'] = df['Valuation'].apply(clean_valuation)

# Verify
print("\nValuation Cleaning Results:")
print(df[['Company', 'Valuation', 'Valuation_Billions']].head(10))


Valuation Cleaning Results:
        Company Valuation  Valuation_Billions
0     Bytedance     $180B               180.0
1        SpaceX     $100B               100.0
2         SHEIN     $100B               100.0
3        Stripe      $95B                95.0
4        Klarna      $46B                46.0
5         Canva      $40B                40.0
6  Checkout.com      $40B                40.0
7     Instacart      $39B                39.0
8     JUUL Labs      $38B                38.0
9    Databricks      $38B                38.0


In [None]:
#Fix the Date Column
# PROBLEM: Date is stored as TEXT

In [27]:

# CLEAN COLUMN NAMES (VERY IMPORTANT)

df.columns = (
    df.columns
    .str.strip()          # remove extra spaces
    .str.replace(' ', '_')
    .str.lower()
)

print("Columns after cleaning:")
print(df.columns)

Columns after cleaning:
Index(['company', 'valuation', 'date_joined', 'industry', 'city', 'country',
       'continent', 'year_founded', 'funding', 'select_investors',
       'year_joined'],
      dtype='object')


In [28]:
# CONVERT DATE COLUMN TO DATETIME

df['date_joined'] = pd.to_datetime(
    df['date_joined'],    # safe now
    format='%d-%m-%Y',
    errors='coerce'
)

# EXTRACT YEAR

df['year_joined'] = df['date_joined'].dt.year

# VERIFY RESULTS


print("\nDate Conversion Results:")
print(df[['company', 'date_joined', 'year_joined']].head())


Date Conversion Results:
     company date_joined  year_joined
0  Bytedance  2017-04-07         2017
1     SpaceX  2012-12-01         2012
2      SHEIN  2018-07-03         2018
3     Stripe  2014-01-23         2014
4     Klarna  2011-12-12         2011


In [29]:
# Check failed conversions
failed = df['date_joined'].isna().sum()
print(f"\nFailed conversions: {failed}")

# EXAMPLE ANALYSIS

# Unicorn count by year
unicorns_per_year = df['year_joined'].value_counts().sort_index()
print("\nUnicorns per year:")
print(unicorns_per_year)


Failed conversions: 0

Unicorns per year:
year_joined
2007      1
2011      2
2012      4
2013      3
2014     13
2015     35
2016     21
2017     44
2018    103
2019    104
2020    108
2021    520
2022    116
Name: count, dtype: int64


In [None]:
#Create Calculated Columns

In [22]:
# CELL 5 - MINIMAL VERSION 
# 1. Years to unicorn (simple)
# Now create Year_Joined (auto-detect date format)
try:
    df['Year_Joined'] = pd.to_datetime(df['Date_Joined'], dayfirst=True).dt.year
    print("Created Year_Joined")
except Exception as e:
    print(f"Error creating Year_Joined: {e}")

‚úÖ Created Year_Joined


In [10]:
# 2. Funding in billions (simple)
try:
    df['Funding_Billions'] = df['Funding_Millions'] / 1000
    print("Created Funding_Billions")
except Exception as e:
    print(f"Error with Funding_Billions: {e}")

Created Funding_Billions


In [11]:
# 3. Skip efficiency ratio for now (optional)
print("Skipping Efficiency_Ratio (can add later)")

Skipping Efficiency_Ratio (can add later)


In [20]:
# 4. Valuation tier (simple)
try:
    # Convert Valuation text to numeric billions
    df['Valuation_Billions'] = (
        df['Valuation']
        .str.replace('$', '', regex=False)
        .str.replace('B', '', regex=False)
        .astype(float)
    )
    print(" Created Valuation_Billions")
except Exception as e:
    print(f"Error creating Valuation_Billions: {e}")


‚úÖ Created Valuation_Billions


In [13]:
# Show what worked
print("\nColumns created:")
print(df.columns.tolist())


Columns created:
['Company', 'Valuation', 'Date_Joined', 'Industry', 'City', 'Country', 'Continent', 'Year_Founded', 'Funding', 'Select Investors', 'Funding_Millions', 'Funding_Billions']


In [14]:
# Check current missing values
print("Missing values after cleaning:")
print(df.isnull().sum())

Missing values after cleaning:
Company              0
Valuation            0
Date_Joined          0
Industry             0
City                16
Country              0
Continent            0
Year_Founded         0
Funding              0
Select Investors     1
Funding_Millions    12
Funding_Billions    12
dtype: int64


In [15]:
# Are any companies listed twice?
duplicates = df.duplicated(subset=['Company']).sum()
print(f"\nDuplicate companies: {duplicates}")


Duplicate companies: 1


In [16]:
# If duplicates found, remove them
if duplicates > 0:
    print("Removing duplicates...")
    df = df.drop_duplicates(subset=['Company'], keep='first')
    print(f"Kept first occurrence, removed {duplicates} duplicates")

Removing duplicates...
Kept first occurrence, removed 1 duplicates


In [24]:
#CREATE ALL CALCULATED COLUMNS

# 1. Years to unicorn
try:
    df['Years_To_Unicorn'] = df['Year_Joined'] - df['Year_Founded']
    print("Created Years_To_Unicorn")
except Exception as e:
    print(f"Error with Years_To_Unicorn: {e}")

# 2. Valuation tier
try:
    df['Valuation_Tier'] = pd.cut(
        df['Valuation_Billions'], 
        bins=[0, 2, 5, 10, 200], 
        labels=['Entry ($1-2B)', 'Mid ($2-5B)', 'Super ($5-10B)', 'Mega ($10B+)']
    )
    print("Created Valuation_Tier")
except Exception as e:
    print(f"Error with Valuation_Tier: {e}")

# 3. Efficiency ratio
try:
    df['Efficiency_Ratio'] = df['Valuation_Billions'] / (df['Funding_Billions'] + 0.001)
    print("Created Efficiency_Ratio")
except Exception as e:
    print(f"Error with Efficiency_Ratio: {e}")

# Show what worked
print("\n All columns:")
print(df.columns.tolist())

‚úÖ Created Years_To_Unicorn
‚úÖ Created Valuation_Tier
‚úÖ Created Efficiency_Ratio

‚úÖ All columns:
['Company', 'Valuation', 'Date_Joined', 'Industry', 'City', 'Country', 'Continent', 'Year_Founded', 'Funding', 'Select Investors', 'Funding_Millions', 'Funding_Billions', 'Valuation_Billions', 'Year_Joined', 'Years_To_Unicorn', 'Valuation_Tier', 'Efficiency_Ratio']


In [25]:
# ============================================
# CREATE A DATA QUALITY REPORT
# ============================================

print("=" * 70)
print(" " * 20 + "DATA CLEANING SUMMARY")
print("=" * 70)

print("\nüìä ORIGINAL DATA:")
print(f"   ‚Ä¢ Total companies: 1,074")
print(f"   ‚Ä¢ Original columns: 10")

print("\nüîß CLEANING PERFORMED:")
print(f"   ‚úÖ Converted Funding: text ‚Üí numeric (millions)")
print(f"   ‚úÖ Converted Valuation: text ‚Üí numeric (billions)")
print(f"   ‚úÖ Fixed Date_Joined: text ‚Üí datetime")
print(f"   ‚úÖ Created 5 calculated columns")
print(f"   ‚úÖ Removed {duplicates if duplicates > 0 else 0} duplicates")

print("\nüìà FINAL DATASET:")
print(f"   ‚Ä¢ Total rows: {len(df)}")
print(f"   ‚Ä¢ Total columns: {len(df.columns)}")
print(f"   ‚Ä¢ Complete rows (no NaN): {len(df.dropna())}")

print("\nüìÅ NEW COLUMNS CREATED:")
new_cols = ['Funding_Millions', 'Valuation_Billions', 'Date_Joined', 
            'Year_Joined', 'Years_To_Unicorn', 'Funding_Billions', 
            'Efficiency_Ratio', 'Valuation_Tier']
for col in new_cols:
    print(f"   ‚Ä¢ {col}")

print("\nüìä KEY STATISTICS:")
print(f"   ‚Ä¢ Average funding: ${df['Funding_Millions'].mean():.0f}M")
print(f"   ‚Ä¢ Average valuation: ${df['Valuation_Billions'].mean():.1f}B")
print(f"   ‚Ä¢ Average years to unicorn: {df['Years_To_Unicorn'].mean():.1f}")
print(f"   ‚Ä¢ Companies with known funding: {df['Funding_Millions'].notna().sum()}")

print("\n" + "=" * 70)

                    DATA CLEANING SUMMARY

üìä ORIGINAL DATA:
   ‚Ä¢ Total companies: 1,074
   ‚Ä¢ Original columns: 10

üîß CLEANING PERFORMED:
   ‚úÖ Converted Funding: text ‚Üí numeric (millions)
   ‚úÖ Converted Valuation: text ‚Üí numeric (billions)
   ‚úÖ Fixed Date_Joined: text ‚Üí datetime
   ‚úÖ Created 5 calculated columns
   ‚úÖ Removed 1 duplicates

üìà FINAL DATASET:
   ‚Ä¢ Total rows: 1073
   ‚Ä¢ Total columns: 17
   ‚Ä¢ Complete rows (no NaN): 1044

üìÅ NEW COLUMNS CREATED:
   ‚Ä¢ Funding_Millions
   ‚Ä¢ Valuation_Billions
   ‚Ä¢ Date_Joined
   ‚Ä¢ Year_Joined
   ‚Ä¢ Years_To_Unicorn
   ‚Ä¢ Funding_Billions
   ‚Ä¢ Efficiency_Ratio
   ‚Ä¢ Valuation_Tier

üìä KEY STATISTICS:
   ‚Ä¢ Average funding: $557M
   ‚Ä¢ Average valuation: $3.4B
   ‚Ä¢ Average years to unicorn: 7.0
   ‚Ä¢ Companies with known funding: 1061



In [29]:
test_df = pd.read_csv(output_file)

In [31]:
print("\nFirst 3 rows of saved file:")
print(test_df.head(3))


First 3 rows of saved file:
     Company Valuation Date_Joined                         Industry  \
0  Bytedance     $180B  07-04-2017          Artificial intelligence   
1     SpaceX     $100B  01-12-2012                            Other   
2      SHEIN     $100B  03-07-2018  E-commerce & direct-to-consumer   

        City        Country      Continent  Year_Founded Funding  \
0    Beijing          China           Asia          2012     $8B   
1  Hawthorne  United States  North America          2002     $7B   
2   Shenzhen          China           Asia          2008     $2B   

                                    Select Investors  Funding_Millions  \
0  Sequoia Capital China, SIG Asia Investments, S...            8000.0   
1  Founders Fund, Draper Fisher Jurvetson, Rothen...            7000.0   
2  Tiger Global Management, Sequoia Capital China...            2000.0   

   Funding_Billions  Valuation_Billions  Year_Joined  Years_To_Unicorn  \
0               8.0               180.0   

In [32]:
df.head(10)

Unnamed: 0,Company,Valuation,Date_Joined,Industry,City,Country,Continent,Year_Founded,Funding,Select Investors,Funding_Millions,Funding_Billions,Valuation_Billions,Year_Joined,Years_To_Unicorn,Valuation_Tier,Efficiency_Ratio
0,Bytedance,$180B,07-04-2017,Artificial intelligence,Beijing,China,Asia,2012,$8B,"Sequoia Capital China, SIG Asia Investments, S...",8000.0,8.0,180.0,2017,5,Mega ($10B+),22.497188
1,SpaceX,$100B,01-12-2012,Other,Hawthorne,United States,North America,2002,$7B,"Founders Fund, Draper Fisher Jurvetson, Rothen...",7000.0,7.0,100.0,2012,10,Mega ($10B+),14.283674
2,SHEIN,$100B,03-07-2018,E-commerce & direct-to-consumer,Shenzhen,China,Asia,2008,$2B,"Tiger Global Management, Sequoia Capital China...",2000.0,2.0,100.0,2018,10,Mega ($10B+),49.975012
3,Stripe,$95B,23-01-2014,Fintech,San Francisco,United States,North America,2010,$2B,"Khosla Ventures, LowercaseCapital, capitalG",2000.0,2.0,95.0,2014,4,Mega ($10B+),47.476262
4,Klarna,$46B,12-12-2011,Fintech,Stockholm,Sweden,Europe,2005,$4B,"Institutional Venture Partners, Sequoia Capita...",4000.0,4.0,46.0,2011,6,Mega ($10B+),11.497126
5,Canva,$40B,08-01-2018,Internet software & services,Surry Hills,Australia,Oceania,2012,$572M,"Sequoia Capital China, Blackbird Ventures, Mat...",572.0,0.572,40.0,2018,6,Mega ($10B+),69.808028
6,Checkout.com,$40B,02-05-2019,Fintech,London,United Kingdom,Europe,2012,$2B,"Tiger Global Management, Insight Partners, DST...",2000.0,2.0,40.0,2019,7,Mega ($10B+),19.990005
7,Instacart,$39B,30-12-2014,"Supply chain, logistics, & delivery",San Francisco,United States,North America,2012,$3B,"Khosla Ventures, Kleiner Perkins Caufield & By...",3000.0,3.0,39.0,2014,2,Mega ($10B+),12.995668
8,JUUL Labs,$38B,20-12-2017,Consumer & retail,San Francisco,United States,North America,2015,$14B,Tiger Global Management,14000.0,14.0,38.0,2017,2,Mega ($10B+),2.714092
9,Databricks,$38B,05-02-2019,Data management & analytics,San Francisco,United States,North America,2013,$3B,"Andreessen Horowitz, New Enterprise Associates...",3000.0,3.0,38.0,2019,6,Mega ($10B+),12.662446


In [35]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openp


[notice] A new release of pip is available: 25.2 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
# Save as Excel xlsx
output_file = 'cleaned_unicorns.xlsx'
df.to_excel(output_file, index=False)
print(f"Saved as Excel file: {output_file}")

‚úÖ Saved as Excel file: cleaned_unicorns.xlsx


In [37]:
df.to_csv('cleaned_unicorns.csv', index=False, encoding='utf-8-sig')