In [10]:
# you can combine multiple series along a particular axis (column-wise or row-wise)
import pandas as pd

# Create pandas Series
courses = pd.Series(["Spark","PySpark","Hadoop"])
fees = pd.Series([22000,25000,23000])
discount  = pd.Series([1000,2300,1000])

# Combine two Series
df = pd.concat([courses, fees], axis=1)
print("Concat 2 lists ...\n", df)

# Combine multiple Series
df = pd.concat([courses, fees, discount], axis=1)
print("\nConcat 3 lists ...\n", df)



Concat 2 lists ...
          0      1
0    Spark  22000
1  PySpark  25000
2   Hadoop  23000

Concat 3 lists ...
          0      1     2
0    Spark  22000  1000
1  PySpark  25000  2300
2   Hadoop  23000  1000


In [12]:
# Create Series by assigning names to each column
import pandas as pd

courses = pd.Series(["Spark","PySpark","Hadoop"], name='courses')
fees = pd.Series([22000,25000,23000], name='fees')
discount  = pd.Series([1000,2300,1000],name='discount')

df = pd.concat([courses,fees,discount],axis=1)
print(df)




   courses   fees  discount
0    Spark  22000      1000
1  PySpark  25000      2300
2   Hadoop  23000      1000
    Courses  Course_Fee  Course_Discount
r1    Spark       22000             1000
r2  PySpark       25000             2300
r3   Hadoop       23000             1000


In [15]:
# Create Series with assigned indexes and provide custom column names
import pandas as pd

courses = pd.Series(["Spark","PySpark","Hadoop"], name='courses')
fees = pd.Series([22000,25000,23000], name='fees')
discount  = pd.Series([1000,2300,1000],name='discount')

# Assign Index to Series
index_labels=['r1','r2','r3']
courses.index = index_labels
fees.index = index_labels
discount.index = index_labels

# Concat Series by Changing Names
df = pd.concat({'Courses': courses,
                'Course_Fee': fees,
                'Course_Discount': discount},axis=1)
print(df)

    Courses  Course_Fee  Course_Discount
r1    Spark       22000             1000
r2  PySpark       25000             2300
r3   Hadoop       23000             1000


In [17]:
# Create Series with assigned indexes and provide custom column names to each
import pandas as pd

courses = pd.Series(["Spark","PySpark","Hadoop"], name='courses')
fees = pd.Series([22000,25000,23000], name='fees')
discount  = pd.Series([1000,2300,1000],name='discount')

# Assign Index to Series
index_labels = ['r1','r2','r3']
courses.index = index_labels
fees.index = index_labels
discount.index = index_labels

# Concat Series by Changing Names
df=pd.concat({'Courses': courses,
                'Course_Fee': fees,
                'Course_Discount': discount},axis=1)

#change the index to a column & create new index
df = df.reset_index()

print(df)

  index  Courses  Course_Fee  Course_Discount
0    r1    Spark       22000             1000
1    r2  PySpark       25000             2300
2    r3   Hadoop       23000             1000


In [18]:
# Create Series by assigning names
import pandas as pd

courses = pd.Series(["Spark","PySpark","Hadoop"], name='courses')
fees = pd.Series([22000,25000,23000], name='fees')

# using pandas series merge()
df = pd.merge(courses, fees, right_index = True,
                left_index = True)
print(df)

   courses   fees
0    Spark  22000
1  PySpark  25000
2   Hadoop  23000


In [19]:
# Using Series.append()
import pandas as pd

courses_am = pd.Series(["Spark","PySpark","Hadoop"])
courses_pm = pd.Series(["Pandas","Python","Scala"])

df = pd.DataFrame(courses_am.append(courses_pm,
                                    ignore_index = True),
                    columns=['all_courses'])
print(df)

  all_courses
0       Spark
1     PySpark
2      Hadoop
3      Pandas
4      Python
5       Scala


  df = pd.DataFrame(courses_am.append(courses_pm,


In [21]:
# Using concat()
import pandas as pd

courses_am = pd.Series(["Spark","PySpark","Hadoop"])
courses_pm = pd.Series(["Pandas","Python","Scala"])

df = pd.DataFrame(pd.concat([courses_am,courses_pm], ignore_index=True))

print(df)

         0
0    Spark
1  PySpark
2   Hadoop
3   Pandas
4   Python
5    Scala


In [22]:
# importing Pandas library
import pandas as pd

# creating and initializing a nested list
age_list = [['Afghanistan', 1952, 8425333, 'Asia'],
            ['Australia', 1957, 9712569, 'Oceania'],
            ['Brazil', 1962, 76039390, 'Americas'],
            ['China', 1957, 637408000, 'Asia'],
            ['France', 1957, 44310863, 'Europe'],
            ['India', 1952, 3.72e+08, 'Asia'],
            ['South Africa', 1966, 0.0, 'Africa'],
            ['United States', 1957, 171984000, 'Americas']]

# creating a Pandas DataFrame
import pandas as pd

df = pd.DataFrame(age_list, columns=['Country', 'Year',
                                    'Population', 'Continent'])
print("Original DataFrame ...\n", df)

#### ASCENDING EXAMPLE ####
# Sorting the DataFrame in Ascending order -- Sorting by column 'Continent'
df.sort_values(by = ['Continent'], inplace = True)
#print("\nDF sorted by Continent ...\n", df)


#### DESCENDING EXAMPLE ####
# Sorting the Data frame in Descending order -- Sorting by column "Population"
df.sort_values(by = ['Country'], inplace = True, ascending = False)
# print("\nDF sorted by Country descending ...\n", df)


#### MISSING VALUES EXAMPLE ####
# Sorting column "Population" by putting missing values first
df.sort_values(by = ['Population'], inplace = True, na_position = 'first')
# print("\nDF sorted by missing values first ...\n", df)


#### MULTI COLUMN SORT EXAMPLE ####
# Sorting by multiple columns -- "Country" and then "Continent"
df.sort_values(by = ['Continent', 'Country'], inplace = True)
# print("\nDF sorting multiple columns ...\n", df)


#### EXAMPLE SORT MULTI COLUMNS IN DIFFERENT ORDER ####
# Sorting Data frames by multiple columns but different order
# Sorting "Country" descending, and "Continent" ascending
df.sort_values(by = ['Country', 'Continent'],
                ascending = [False, True], inplace = True)
print("\nDF sorting multiple columns in different order ...\n", df)

Original DataFrame ...
          Country  Year   Population Continent
0    Afghanistan  1952    8425333.0      Asia
1      Australia  1957    9712569.0   Oceania
2         Brazil  1962   76039390.0  Americas
3          China  1957  637408000.0      Asia
4         France  1957   44310863.0    Europe
5          India  1952  372000000.0      Asia
6   South Africa  1966          0.0    Africa
7  United States  1957  171984000.0  Americas

DF sorting multiple columns in different order ...
          Country  Year   Population Continent
7  United States  1957  171984000.0  Americas
6   South Africa  1966          0.0    Africa
5          India  1952  372000000.0      Asia
4         France  1957   44310863.0    Europe
3          China  1957  637408000.0      Asia
2         Brazil  1962   76039390.0  Americas
1      Australia  1957    9712569.0   Oceania
0    Afghanistan  1952    8425333.0      Asia


In [28]:
# importing Pandas library
import pandas as pd

# creating and initializing a nested list
age_list = [['Afghanistan', 1952, 8425333, 'Asia'],
            ['Australia', 1957, 9712569, 'Oceania'],
            ['Brazil', 1962, 76039390, 'Americas'],
            ['China', 1957, 637408000, 'Asia'],
            ['France', 1957, 44310863, 'Europe'],
            ['India', 1952, 3.72e+08, 'Asia'],
            ['South Africa', 1966, 0.0, 'Africa'],
            ['United States', 1957, 171984000, 'Americas']]

# creating a Pandas DataFrame
import pandas as pd

df = pd.DataFrame(age_list, columns=['Country', 'Year',
                                    'Population', 'Continent'])
print("Original DataFrame ...\n", df)

#### ASCENDING EXAMPLE ####
# Sorting the DataFrame in Ascending order -- Sorting by column 'Continent'
df.sort_values(by = ['Continent'], inplace = True)
#print("\nDF sorted by Continent ...\n", df)


#### DESCENDING EXAMPLE ####
# Sorting the Data frame in Descending order -- Sorting by column "Population"
df.sort_values(by = ['Country'], inplace = True, ascending = False)
# print("\nDF sorted by Country descending ...\n", df)


#### MISSING VALUES EXAMPLE ####
# Sorting column "Population" by putting missing values first
df.sort_values(by = ['Population'], inplace = True, na_position = 'first')
# print("\nDF sorted by missing values first ...\n", df)


#### MULTI COLUMN SORT EXAMPLE ####
# Sorting by multiple columns -- "Country" and then "Continent"
df.sort_values(by = ['Continent', 'Country'], inplace = True)
# print("\nDF sorting multiple columns ...\n", df)


#### EXAMPLE SORT MULTI COLUMNS IN DIFFERENT ORDER ####
# Sorting Data frames by multiple columns but different order
# Sorting "Country" descending, and "Continent" ascending
df.sort_values(by = ['Country', 'Continent'],
                ascending = [False, True], inplace = True)
print("\nDF sorting multiple columns in different order ...\n", df)

Original DataFrame ...
          Country  Year   Population Continent
0    Afghanistan  1952    8425333.0      Asia
1      Australia  1957    9712569.0   Oceania
2         Brazil  1962   76039390.0  Americas
3          China  1957  637408000.0      Asia
4         France  1957   44310863.0    Europe
5          India  1952  372000000.0      Asia
6   South Africa  1966          0.0    Africa
7  United States  1957  171984000.0  Americas


ValueError: Length of ascending (2) != length of by (1)

In [29]:
# Import pandas package
import pandas as pd

# Define a dictionary containing Students data
data = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Height': [5.1, 6.2, 5.1, 5.2],
        'Qualification': ['Msc', 'MA', 'Msc', 'Msc']}

# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
print("Original DataFrame ...\n", df)


#### LIST AS COLUMN EXAMPLE ####
# Declare a list that is to be converted into a column
address = ['Delhi', 'Bangalore', 'Chennai', 'Patna']
# Using 'Address' as the column name and equating it to the list
df['Address'] = address

# print("\nDF with column from list ...\n", df)


#### INSERT EXAMPLE ####
# Using DataFrame.insert() to add a column
df.insert(2, "Age", [21, 23, 24, 21],True)
# print("\nDF with insert as column 2 ...\n", df)


#### ASSIGN EXAMPLE ####
# Using 'Address' as the column name and assign it to the list
df = df.assign(Pets=['Dog', 'Bunny', 'Chinchilla', 'Parrot'])

# print("\nDF with assigned column added ...\n", df)


#### DICTIONARY EXAMPLE ####
# Define a dictionary with keys of an existing column
# and their respective values as the values for our new column
# If a primary key is defined use that key
sport = {'Jai': 'Darts', 'Princi': 'Basketball',
                'Gaurav': 'PaddleBoarding', 'Anuj': 'Cricket'}

# Provide 'Sport' as the new column name and map it to the key column
df['Sport'] = df['Name'].map(sport)
print("\nDF with new column from dictionary ...\n", df)

Original DataFrame ...
      Name  Height Qualification
0     Jai     5.1           Msc
1  Princi     6.2            MA
2  Gaurav     5.1           Msc
3    Anuj     5.2           Msc

DF with new column from dictionary ...
      Name  Height  Age Qualification    Address        Pets           Sport
0     Jai     5.1   21           Msc      Delhi         Dog           Darts
1  Princi     6.2   23            MA  Bangalore       Bunny      Basketball
2  Gaurav     5.1   24           Msc    Chennai  Chinchilla  PaddleBoarding
3    Anuj     5.2   21           Msc      Patna      Parrot         Cricket
