In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/shark-tank-india-dataset/Shark Tank India Dataset.csv


# Combining Datasets : Concat and Append
Some of the most interesting studies of data come from combining different data sources.
These operations can involve anything from very straightforward concatenation of two different datasets, to more complicated database-style joins and merges that correctly handle any overlaps between the datasets.
``Series`` and ``DataFrame``s are built with this type of operation in mind, and Pandas includes functions and methods that make this sort of data wrangling fast and straightforward.

Here we'll take a look at simple concatenation of ``Series`` and ``DataFrame``s with the ``pd.concat`` and ``pd.append`` functions; later we'll dive into more sophisticated in-memory merges and joins implemented in Pandas.


In [2]:
df1 = {'one' : pd.Series([90, 80, 70, 60],
                       index =['a', 'b', 'c', 'd']),
      'two' : pd.Series([10, 20, 30, 40],
                        index =['a', 'b', 'c', 'd'])}




df2 = {'one' : pd.Series([10, 20, 30, 40],
                       index =['e', 'f', 'g', 'h']),
      'two' : pd.Series([50, 60, 70, 80],
                       index =['e', 'f', 'g', 'h'])}

In [3]:
df1 = pd.DataFrame(df1)

In [4]:
df2 = pd.DataFrame(df2)

In [5]:
df1

Unnamed: 0,one,two
a,90,10
b,80,20
c,70,30
d,60,40


In [6]:
df2

Unnamed: 0,one,two
e,10,50
f,20,60
g,30,70
h,40,80


In [7]:
df1.append(df2)

Unnamed: 0,one,two
a,90,10
b,80,20
c,70,30
d,60,40
e,10,50
f,20,60
g,30,70
h,40,80


In [8]:
frames=[df1,df2]

In [9]:
df1

Unnamed: 0,one,two
a,90,10
b,80,20
c,70,30
d,60,40


In [10]:
df2

Unnamed: 0,one,two
e,10,50
f,20,60
g,30,70
h,40,80


In [11]:
pd.concat(frames)

Unnamed: 0,one,two
a,90,10
b,80,20
c,70,30
d,60,40
e,10,50
f,20,60
g,30,70
h,40,80


## Performance: Which is faster pandas concat or append?
Well, both are almost equally faster.
However there will be a slight change depending on the data.<br>
* Append function will add rows of second data frame to first dataframe iteratively one by one. Concat function will do a single operation to finish the job, which makes it faster than append().<br>
* As append will add rows one by one, if the dataframe is significantly very small, then append operation is fine as only a few appends will be done for the number of rows in second dataframe.<br>
* Append function will create a new resultant dataframe instead of modifying the existing one. Due to this buffering and creating process, Append operation’s performance is less than concat() function. 

**However Append() is fine if the number of append operation is a very few. If there are a multiple append operations needed, it is better to use concat()**

# 1. Append rows of another dataframe
You can append another dataframe’s rows at the end of a dataframe. Pass the dataframe whose rows you want to append as an argument to the append() function.

In [12]:
import pandas as pd

# create a sample dataframe
data1 = {
    'Name': ['Microsoft Corporation', 'Google, LLC', 'Tesla, Inc.'],
    'Symbol': ['MSFT', 'GOOG', 'TSLA'],
    'Shares': [100, 50, 150]
}
df1 = pd.DataFrame(data1)
# print the original dataframe
print("The original dataframe:\n")
print(df1)

# The dataframe to append
data2 = {
    'Name':['Apple Inc.', 'Netflix, Inc.'],
    'Symbol':['APPL', 'NFLX'],
    'Shares': [200, 80]
}
df2 = pd.DataFrame(data2)
# print the dataframe to append
print("\nThe dataframe to append:\n")
print(df2)

# Append rows
df3 = df1.append(df2)
print("\nThe appended dataframe:\n")
print(df3)

The original dataframe:

                    Name Symbol  Shares
0  Microsoft Corporation   MSFT     100
1            Google, LLC   GOOG      50
2            Tesla, Inc.   TSLA     150

The dataframe to append:

            Name Symbol  Shares
0     Apple Inc.   APPL     200
1  Netflix, Inc.   NFLX      80

The appended dataframe:

                    Name Symbol  Shares
0  Microsoft Corporation   MSFT     100
1            Google, LLC   GOOG      50
2            Tesla, Inc.   TSLA     150
0             Apple Inc.   APPL     200
1          Netflix, Inc.   NFLX      80


In [13]:
print(df1.append(df2, ignore_index=True))

                    Name Symbol  Shares
0  Microsoft Corporation   MSFT     100
1            Google, LLC   GOOG      50
2            Tesla, Inc.   TSLA     150
3             Apple Inc.   APPL     200
4          Netflix, Inc.   NFLX      80


# 2. Append rows with a mismatch in columns
Columns that are not present in the original dataframe (the one on which the append function is applied) are added as new columns. See the example below:

In [14]:
import pandas as pd

# create a sample dataframe
data1 = {
    'Name': ['Microsoft Corporation', 'Google, LLC', 'Tesla, Inc.'],
    'Symbol': ['MSFT', 'GOOG', 'TSLA'],
    'Shares': [100, 50, 150]
}
df1 = pd.DataFrame(data1)
# print the original dataframe
print("The original dataframe:\n")
print(df1)

# The dataframe to append
data2 = {
    'Name':['Apple Inc.', 'Netflix, Inc.'],
    'Symbol':['APPL', 'NFLX'],
    'Shares': [200, 80],
    'Market Cap($B)': ['2030', '237']
}
df2 = pd.DataFrame(data2)
# print the dataframe to append
print("\nThe dataframe to append:\n")
print(df2)

# Append rows
df3 = df1.append(df2)
print("\nThe appended dataframe:\n")
print(df3)

The original dataframe:

                    Name Symbol  Shares
0  Microsoft Corporation   MSFT     100
1            Google, LLC   GOOG      50
2            Tesla, Inc.   TSLA     150

The dataframe to append:

            Name Symbol  Shares Market Cap($B)
0     Apple Inc.   APPL     200           2030
1  Netflix, Inc.   NFLX      80            237

The appended dataframe:

                    Name Symbol  Shares Market Cap($B)
0  Microsoft Corporation   MSFT     100            NaN
1            Google, LLC   GOOG      50            NaN
2            Tesla, Inc.   TSLA     150            NaN
0             Apple Inc.   APPL     200           2030
1          Netflix, Inc.   NFLX      80            237
