Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 212 lines (187 sloc) 7.92 KB
#!/usr/bin/python
# split-pcap.py begins on the previous line
#
# This script will take a pcap file and split into multiple files, one per
# TCP 4-tuple. If the 4-tuple is reused for multiple connections all
# connections will be in the 1 file. The output file name will be the original
# file name stripped of any directories followed by
# _IP1-Port1_IP2-Port2_split.pcap
# The files are written into the current directory.
#
# The script will exit with a warning if it detects files with the same
# file name as the original file and the suffix _split.pcap. This is because
# the output files are written in append mode so if the script is run over
# the same file twice it will just add the packets to the existing files
# and you end up with duplicates.
# This script only supports pcap formated files. See the usage message for
# suggestions on converting pcapng files --
# or try googling "convert pcapng to pcap"
#
# This script requires the modules sys, re (regular expression), logging, glob
# and scapy. I expect that sys, re, logging, and glob are installed by default.
# You will probably have to install scapy. How will vary depending on your
# OS nd what is already installed. You can try:
# pip install scapy
# or apt-get install python-scapy
# or rpm install scapy (from the epel repro)
#
# Usage:
# split-pcap.py file-name [packet-count]
# Where:
# file-name is the name or path of the file to be processed. This script
# only supports pcap files, not pcapng. The following commands can be
# used to convert a pcapng file to pcap format
# tcpdump -r file.pcapng -w new-file.pcap
# or
# tshark -r file.pcapng -w new-file.pcap -F libpcap
# You can also use editcap with the -F libcap argument
#
# packet-count is optional and is the total number of packets (any protocol)
# in the file. If packet-count is provided the script will write a line
# "Percentage done: " for every integer increase in the percentage of
# the packets processed. If you get the number wrong the percentages
# will be off but you can multiple the count by 10 to get a line every
# 10 percent instead of every 1 percent. If packet-count is not provided
# the script will write a line with the packet count of the packet
# being processed.
# Version 1.0 July 9, 2017
# Version 1.1 July 9, 2017
# Correctetd handling of the file name prefix to strip off any directory
# path character to leave just thefile name. Also the detection of
# already existing files to check the current directory and not the input
# file source directory.
# Version 1.2 July 13, 2017
# Correcly write the link type for pcap contained Cooked Linux frames
# abort for anything other than Ethernet and CookedLinux
#
# from https://github.com/noahdavids/packet-analysis.git
#
# Copyright (C) 2017 Noah Davids
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, version 3, https://www.gnu.org/licenses/gpl-3.0.html
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
import sys
import re
import glob
# This is needed to suppress a really irrating warning message when scapy
# is imported
import logging
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
try:
from scapy.all import*
except ImportError:
print "scapy is not installed. See comments for installation suggestions"
exit ()
# argument processing, require just the file name. If a second argument
# is provided make sure its an integer
if len (sys.argv) < 2 or len (sys.argv) > 3:
print "Usage is: split-pcap.py file-name [packet-count]"
print "Try\n grep -A 20 Usage: " + sys.argv[0] + \
" | head -20\nfor details"
exit ()
if len (sys.argv) == 3:
inputFileString = sys.argv [1]
try:
inputTotalPackets = int (sys.argv [2])
except ValueError:
print "The second argument must be an integer <" + \
sys.argv [2] + "> does appear to be an integer"
exit ()
else:
inputFileString = sys.argv [1]
inputTotalPackets = 0
# try opening the file.
try:
pcapIn = PcapReader (inputFileString)
except IOError:
print "It doesn't look like " + inputFileString + " exists"
exit()
except NameError:
print "It doesn't look like " + inputFileString + \
" is a file that can be processed."
print "Note that this script cannot process pcapng files. Review the "
print "usage details for ideas on how to convert from pcapng to pcap"
exit ()
# Extract out just the the file name. Note that I assume the the ".*/" match
# is greedy and will match until the last "/" character in the string. If
# the match fails there are no "/" characters so the whole string must be the
# name.
x = re.search ("^.*/(.*$)", inputFileString)
try:
prefix = x.group(1) + "_"
except:
prefix = inputFileString + "_"
# Look for prefix*_split.pcap files. If you find them print a
# warning and exit.
t = len (glob (prefix + "*_split.pcap"))
if t > 0:
print "There are already " + str (t) + " files with the name " + \
prefix + "*_split.pcap."
print "Delete or rename them or change to a different directory to"
print "avoid adding duplicate packets into the " + prefix + \
"*_split.pcap trace files."
exit ()
pcapOutName = ""
oldPcapOutName = ""
packetCount = 0
donePercentage = 0;
oldDonePercentage = -1
# Loop for each packet in the file
for aPkt in pcapIn:
# count the packets read
packetCount = packetCount + 1
# If the packet contains a TCP header extract out the IP addresses and
# port numbers
if TCP in aPkt:
ipSrc = aPkt[IP].src
tcpSport = aPkt[TCP].sport
ipDst = aPkt[IP].dst
tcpDport = aPkt[TCP].dport
# put things in some sort of cannonical order. It doesn't really matter
# what the order is as long as packets going in either direction get the
# same order.
if ipSrc > ipDst:
pcapOutName = prefix + ipSrc + "-" + str(tcpSport) + "_" + ipDst + "-" + str(tcpDport) + "_split.pcap"
elif ipSrc < ipDst:
pcapOutName = prefix + ipDst + "-" + str(tcpDport) + "_" + ipSrc + "-" + str(tcpSport) + "_split.pcap"
elif tcpSport > tcpDport:
pcapOutName = prefix + ipSrc + "-" + str(tcpSport) + "_" + ipDst + "-" + str(tcpDport) + "_split.pcap"
else:
pcapOutName = prefix + ipDst + "-" + str(tcpDport) + "_" + ipSrc + "-" + str(tcpSport) + "_split.pcap"
# If the current packet should be written to a different file from the last
# packet, close the current output file and open the new file for append
# save the name of the newly opened file so we can compare it for the next
# packet.
if pcapOutName != oldPcapOutName:
if oldPcapOutName != "":
pcapOut.close()
if type(aPkt) == scapy.layers.l2.Ether:
lkType = 1
elif type (aPkt) == scapy.layers.l2.CookedLinux:
lkType = 113
else:
print "Unknown link type: "
type (aPkt)
print " -- exiting"
exit
pcapOut = PcapWriter (pcapOutName, linktype=lkType, append=True)
oldPcapOutName = pcapOutName
# write the packet
pcapOut.write (aPkt)
# Write the progress information, either percentages if we had a packet-count
# argument or just the packet count.
if inputTotalPackets > 0:
donePercentage = packetCount * 100 / inputTotalPackets
if donePercentage > oldDonePercentage:
print "Percenage done: ", donePercentage
oldDonePercentage = donePercentage
else:
print packetCount
#
# split-pcap.py ends here